ALib C++ Library
Library Version: 2511 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
fscanner.inl
Go to the documentation of this file.
1//==================================================================================================
2/// \file
3/// This header-file is part of module \alib_files of the \aliblong.
4///
5/// \emoji :copyright: 2013-2025 A-Worx GmbH, Germany.
6/// Published under \ref mainpage_license "Boost Software License".
7//==================================================================================================
8ALIB_EXPORT namespace alib { namespace files {
9
10#if ALIB_DEBUG
11/// The format string used with verbose logging to domain <c>/ALIB/FILES/SCAN</c> during
12/// with namespace function \alib{files;ScanFiles}.<br>
13/// Defaults to <c>" {:ta h{2,r} on{10,r} gn{10,r} s(IEC){10,r} dm qqq nf l}"</c>
15#endif
16
17
18/// Input parameters to function #ScanFiles.
20{
21 /// Options for processing symbolic links.
22 enum class SymbolicLinks
23 {
24 DONT_RESOLVE = 0, ///< Demands \b not to resolve symbolic links in any way.
25 RESOLVE_BUT_DONT_FOLLOW = 1, ///< Demands to read symbolic links, but not follow linked directories.
26 ///< FInfo dates, sizes, and access rights are set according to
27 ///< the link target.
28 RECURSIVE = 2, ///< Read symbolic links and in case they are targeting a
29 ///< directory, recurse into, if this directory meets the
30 ///< other constraints associated with the current scan.
31 };
32
33 /// Denotes 'infinite' recursion if set to field #MaxDepth.
34 static constexpr unsigned InfiniteRecursion = (std::numeric_limits<unsigned>::max)();
35
36 /// The path to be scanned.
38
39 /// Denotes how symbolic links are treated.
41
42 /// The maximum recursion depth. Defaults to #InfiniteRecursion.
44
45 /// If \c true, the default, scanning does not stop recursion on directories which represent
46 /// a mounted filesystem. If \c false, the search is restricted to the device that #StartPath
47 /// resides in.
48 bool CrossFileSystems = true;
49
50 /// If \c false (the default), scanning aborts if \e 'artificial' filesystems are found.
51 /// Artificial filesystems under GNU/Linux, are for example:
52 /// <c>/proc</c>, <c>/dev</c>, <c>/run</c>, <c>/sys</c>, and <c>/temp</c>.
53 bool IncludeArtificialFS = false;
54
55 /// If \c false, empty directories remain in the result tree. Otherwise, they are deleted
56 /// and do not appear in the tree.
58
59 /// If set (not containing \c nullptr), files are passed to this filter and removed if \c false
60 /// is returned.<br>
61 /// The term "files" here means all sorts of files except Directories.
62 /// Directories are either real directories, or in case the field #LinkTreatment is set to
63 /// \alib{files::ScanParameters;SymbolicLinks::RECURSIVE}, symbolic links that
64 /// target a directory.
65 ///
66 /// \see Optional filters #DirectoryFilterPreRecursion and #DirectoryFilterPostRecursion.
68
69 /// If set (not containing \c nullptr), this filter is invoked \b after a recursive scan of
70 /// a directory. If \c false is returned, the recursion is not performed, but the (empty)
71 /// directory remains in the result list, if field #RemoveEmptyDirectories evaluates to
72 /// \c false.<br>
73 /// Note that in case field #LinkTreatment is set to
74 /// \alib{files::ScanParameters;SymbolicLinks::RECURSIVE}, this filter
75 /// is also applied to symbolic links, which are readable, not broken, and target a directory.
76 ///
77 /// \note
78 /// Directories (and symbolic links to directories) are first recursively scanned before this
79 /// filter is applied. On deletion, of course the whole scanned subtree is deleted.
80 /// This allows filtering directories, depending on information available only after
81 /// scanning, hence by the numbers retrieved with \alib{files;FInfo::Sums}.
82 /// To increase performance and filter directories \e before their recursive scan,
83 /// alternative field #DirectoryFilterPreRecursion is to be used.
84 ///
85 /// \see Optional filters #DirectoryFilterPreRecursion and #FileFilter.
86 ///
88
89 /// Same as #DirectoryFilterPostRecursion but is used \b before a recursive scan of
90 /// a directory. Consequently, this filter leads to much higher scan performance than the
91 /// alternative version, because huge branches of the file system might be omitted during scan.
92 /// However, the numbers retrieved with \alib{files;FInfo::Sums} will all indicate
93 /// \c 0, because no information is retrieved.<br>
94 /// If a directory is "pruned" due to this filter, the entry still occurs in the \b %FTree,
95 /// unless field #RemoveEmptyDirectories evaluates to \c true.<br>
96 ///
97 /// \see Optional filters #DirectoryFilterPostRecursion and #FileFilter.
98 ///
100
101 /// Constructor accepting all features.
102 /// @param startPath Stored in field #StartPath.
103 /// @param linkTreatment Stored in field #LinkTreatment. Defaults to \b SymbolicLinks::RECURSIVE.
104 /// @param maxDepth Stored in field #MaxDepth. Defaults to +InfiniteRecursion.
105 /// @param crossFileSystems Stored in field #CrossFileSystems. Defaults to \c true.
106 /// @param includeArtificialFS Stored in field #IncludeArtificialFS. Defaults to \c false.
109 unsigned maxDepth = InfiniteRecursion,
110 bool crossFileSystems = true,
111 bool includeArtificialFS= false )
112 : StartPath (startPath )
113 , LinkTreatment (linkTreatment )
114 , MaxDepth (maxDepth )
115 , CrossFileSystems (crossFileSystems )
116 , IncludeArtificialFS(includeArtificialFS) {}
117
118}; // struct ScanParameters
119
120/// A simple triple of a path string, a corresponding \b FTree node, and a boolean to indicate
121/// whether the path existed already. This struct is used as an output parameter of function
122/// #ScanFiles.
124{
125 system::Path RealPath; ///< The 'real' absolute path to the node (no symbolic links included).
126 FTree::Cursor Node; ///< The node in the tree representing this path.
127 bool Existed; ///< If \c true, the complete path existed already in the target
128 ///< \alib{files;FTree}. However, it might have existed as
129 ///< another result path, hence not recursively scanned fashion and
130 ///< now is rescanned with 'higher' scan quality.
131
132 /// Constructor.
133 /// @param realPath Stored in field #RealPath.
134 /// @param node Stored in field #Node.
135 /// @param existed Stored in field #Existed.
136 ResultsPaths(const system::PathString& realPath, FTree::Cursor node, bool existed )
137 : RealPath(realPath)
138 , Node (node)
139 , Existed (existed) {}
140};
141
142#if DOXYGEN
143/// ### General Information ###
144/// Scans the filesystem according to the given \b ScanParameters and adds \alib{files;FInfo}
145/// entries to the given \alib{files;FTree}.
146///
147/// ### ALib FTree Data Contract ###
148/// This function has a contract with the class \alib{files;FTree} that is used to store the scan
149/// results.
150/// This contract states that any file or directory found during a scan is always stored using
151/// the <em>"Real Path"</em> of the entry. This means that any symbolic link is resolved.
152/// The consequences are:
153/// - %Files and directories which represent a symbolic link are always "leaf nodes".
154/// (They never contain child nodes.). However, their symlink target path is attached twice
155/// to the entry:
156/// 1. The original link information given, which often uses relative path addressing.
157/// 2. The absolute, <em>"Real Path"</em> of the target, which has a corresponding result entry
158/// in the given \b %FTree.
159/// - If a using software wants to use symbolic paths, for example, to present them to the end
160/// user, such paths have to be assembled by the user's code in own responsibility.
161/// All information for doing this is provided in the resulting tree object
162/// - Doubly linked target files and directories are never a problem for this scanner. Each
163/// file is scanned only once. This especially prevents all sorts of problems that would otherwise
164/// occur with cyclic symbolic links.
165/// - Due to this, even the given start path of a search might not be found as a result
166/// in the given \b %FTree, because also start paths are converted to a <em>Real Path</em>.
167/// - The scan result may contain more than one resulting path. This happens, if a symbolic link
168/// targets a file or directory that is not recursively included in the start path.
169/// The resulting <em>"Real Path"</em> of the given start path is however always the first
170/// result added.
171///
172/// The latter is reflected with parameter \p{resultPaths} of this function, which is defined
173/// as a <c>std::vector</c>.
174///
175/// \note
176/// As class \alib{files;FTree} is based on class \alib{containers;StringTree}, using code
177/// is enabled to break this contract by adding entries below symbolic links.
178/// Other entities of this \alibmod_nl will not break this contract.
179///
180/// ### Rescanning of Entries ###
181/// Existing entries in the given \p{tree} are not overwritten. They might be scanned with "higher"
182/// \alib{files;FInfo::Qualities} values, depending on given \p{parameters} and how they had been
183/// scanned before. If the same "level" of scanning is provided, existing entries will not be
184/// scanned again. If a rescan of a certain path is wanted, then the target entry of that path has
185/// to be deleted before invoking this function. Due to the implementation of class FTree, repeated
186/// delete and scan operations will not cause any heap-memory activities (of course, as long as no
187/// are detected which have been created meanwhile).
188///
189/// ### platform-dependent Code Selection ###
190/// File scanning is a platform-dependent task and hence \b ALib uses one of two different
191/// implementations:
192/// 1. A posix version for posix compatible OSes,
193/// 2. A version that relies on <c>C++ std::filesystem</c>.
194///
195/// The fallback version using <c>std::filesystem</c> has the following restrictions:
196/// - The only time attribute available is the \alib{files;FInfo::MDate;modification time} of
197/// an entry. Fields \alib{files::FInfo;BDate}, \alib{files::FInfo;CDate}, and
198/// \alib{files::FInfo;ADate} are always set to the same as the
199/// modification time, even on filesystems that support the other values.
200/// - The file time of symbolic links is \b always that of the target file. The C++ standard has
201/// no possibility to access the link's time itself.
202/// - The file time of broken symbolic links is set to the current time (time of scanning).
203/// - The size that directories occupy on disk cannot be determined.
204/// Directory entries always report size <c>0</c>.
205/// - The target of a symbolic link which points to a non-accessible directory, cannot be resolved
206/// to a 'real' path, even if all other path components before are accessible.
207/// (This is true for the implementation of the standard library under GNU/Linux and Clang compiler
208/// at the time of writing this, 2024/02.)
209/// - Flag \alib{files;ScanParameters::CrossFileSystems} is ignored. Crossing Filesystems cannot
210/// be detected using purely the standard library.
211/// - A files' owner and owning group is not determined. Instead, \alib{files;FInfo::UnknownID} is
212/// set for both.
213/// - The scanning process is half as fast as in the Posix version. The reason for this is probably
214/// the internal allocation and deallocation of many quite volatile string objects in the C++
215/// standard library.
216/// Well, but it is still fast though!
217///
218/// \note As for today, using this module under WindowsOS, will fall back to the
219/// <em>C++ std::filesystem</em> version. It may be that a future version will provide a
220/// native implementation of this target system. Volunteers from the community are welcome to
221/// contribute.
222///
223/// @param tree The tree to fill.
224/// @param parameters The input parameters to determine the scan process.
225/// @param resultPaths A container to add the resulting list of 'real' paths and corresponding start
226/// nodes found during the search. The first entry added by this function is
227/// always the 'real'-version of field \alib{files::ScanParameters;StartPath}
228/// of the given \p{params} struct. Further paths/nodes pairs are created when
229/// symbolic links are found and followed.
230/// @param lock Pointer to an (optional) \alib{threads;SharedLock}.
231/// The overloaded version of this function that accepts \alib{SharedFTree}
232/// sets this to the instance found in the shared tree.<br>
233/// This parameter is available (and to be passed) only if the module
234/// \alib_threads is included in the \alibbuild.
235///
236/// @return Scan quality code of the tree node of the first resulting path, hence of the node
237/// referred to by \alib{files;ScanParameters::StartPath}.<br>
238/// On error, i.e the start path was invalid, not accessible, a broken link, a circular
239/// link, or other, then \alib{files::FInfo;Qualities;Qualities::NOT_EXISTENT} is returned.
240/// etc.
243 ScanParameters& parameters,
244 std::vector<ResultsPaths>& resultPaths,
245 SharedLock* lock );
246#else
249 ScanParameters& parameters,
250 std::vector<ResultsPaths>& resultPaths
251 IF_ALIB_THREADS( , SharedLock* lock) );
252#endif
253
254/// Invokes #ScanFiles( FTree&, ScanParameters&, std::vector<ResultsPaths>&,SharedLock*)
255/// passing the lock included in the given \alib{SharedFTree} as parameter \p{lock}.
256///
257/// @param tree The shared tree to fill.
258/// @param parameters The input parameters to determine the scan process.
259/// @param resultPaths The result paths.
260/// @return Scan quality code of the first resulting path.
261inline
263 ScanParameters& parameters,
264 std::vector<ResultsPaths>& resultPaths )
265{ return ScanFiles(*tree,parameters,resultPaths IF_ALIB_THREADS(, &tree.GetLock()) ); }
266
267} // namespace alib[::files]
268
269
270/// Type alias in namespace \b alib.
272
273/// Type alias in namespace \b alib.
275
276} // namespace [alib]
Qualities
Per-entry information about how a node was scanned.
Definition finfo.inl:123
TLock & GetLock() const noexcept
#define ALIB_DLL
Definition alib.inl:503
#define IF_ALIB_THREADS(...)
Definition alib.inl:401
#define ALIB_EXPORT
Definition alib.inl:497
ALIB_DLL FInfo::Qualities ScanFiles(FTree &tree, ScanParameters &parameters, std::vector< ResultsPaths > &resultPaths, SharedLock *lock)
String DBG_FILES_SCAN_VERBOSE_LOG_FORMAT
std::shared_ptr< FFilter > SPFileFilter
A shared pointer to a filter.
Definition ffilter.inl:44
strings::TString< PathCharType > PathString
The string-type used with this ALib Module.
Definition path.inl:33
files::ScanParameters ScanParameters
Type alias in namespace alib.
Definition fscanner.inl:271
threads::SharedLock SharedLock
Type alias in namespace alib.
files::TSharedFTree< SharedLock > SharedFTree
Type alias in namespace alib.
Definition ftree.inl:992
system::Path Path
Type alias in namespace alib.
Definition path.inl:376
files::ResultsPaths ResultsPaths
Type alias in namespace alib.
Definition fscanner.inl:274
strings::TString< character > String
Type alias in namespace alib.
Definition string.inl:2189
ResultsPaths(const system::PathString &realPath, FTree::Cursor node, bool existed)
Definition fscanner.inl:136
system::Path RealPath
The 'real' absolute path to the node (no symbolic links included).
Definition fscanner.inl:125
FTree::Cursor Node
The node in the tree representing this path.
Definition fscanner.inl:126
Input parameters to function ScanFiles.
Definition fscanner.inl:20
unsigned MaxDepth
The maximum recursion depth. Defaults to InfiniteRecursion.
Definition fscanner.inl:43
SPFileFilter DirectoryFilterPreRecursion
Definition fscanner.inl:99
static constexpr unsigned InfiniteRecursion
Denotes 'infinite' recursion if set to field MaxDepth.
Definition fscanner.inl:34
SymbolicLinks LinkTreatment
Denotes how symbolic links are treated.
Definition fscanner.inl:40
SymbolicLinks
Options for processing symbolic links.
Definition fscanner.inl:23
@ DONT_RESOLVE
Demands not to resolve symbolic links in any way.
Definition fscanner.inl:24
Path StartPath
The path to be scanned.
Definition fscanner.inl:37
SPFileFilter DirectoryFilterPostRecursion
Definition fscanner.inl:87
ScanParameters(const system::PathString &startPath, SymbolicLinks linkTreatment=SymbolicLinks::RECURSIVE, unsigned maxDepth=InfiniteRecursion, bool crossFileSystems=true, bool includeArtificialFS=false)
Definition fscanner.inl:107