ALib C++ Library
Library Version: 2510 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
fscanner.inl
Go to the documentation of this file.
1//==================================================================================================
2/// \file
3/// This header-file is part of module \alib_files of the \aliblong.
4///
5/// \emoji :copyright: 2013-2025 A-Worx GmbH, Germany.
6/// Published under \ref mainpage_license "Boost Software License".
7//==================================================================================================
8ALIB_EXPORT namespace alib { namespace files {
9
10#if ALIB_DEBUG
11/// The format string used with verbose logging to domain <c>/ALIB/FILES/SCAN</c> during
12/// with namespace function \alib{files;ScanFiles}.<br>
13/// Defaults to <c>" {:ta h{2,r} on{10,r} gn{10,r} s(IEC){10,r} dm qqq nf l}"</c>
15#endif
16
17
18/// Input parameters to function #ScanFiles.
20{
21 /// Options for processing symbolic links.
22 enum class SymbolicLinks
23 {
24 DONT_RESOLVE = 0, ///< Demands \b not to resolve symbolic links in any way.
25 RESOLVE_BUT_DONT_FOLLOW = 1, ///< Demands to read symbolic links, but not follow linked directories.
26 ///< FInfo dates, sizes and access rights are set according to
27 ///< the link target.
28 RECURSIVE = 2, ///< Read symbolic links and in case they are targeting a
29 ///< directory, recurse into, if this directory meets the
30 ///< other constraints associated with the current scan.
31 };
32
33 /// Denotes 'infinite' recursion if set to field #MaxDepth.
34 static constexpr unsigned int InfiniteRecursion = (std::numeric_limits<unsigned int>::max)();
35
36 /// The path to be scanned.
38
39 /// Denotes how symbolic links are treated.
41
42 /// The maximum recursion depth. Defaults to #InfiniteRecursion.
44
45 /// If \c true, the default, scanning does not stop recursion on directories which represent
46 /// a mounted filesystem. If \c false, the search is restricted to the device that #StartPath
47 /// resides in.
48 bool CrossFileSystems = true;
49
50 /// If \c false (the default), scanning aborts if \e 'artificial' filesystems are found.
51 /// Artificial filesystems under GNU/Linux, are for example:
52 /// <c>/proc</c>, <c>/dev</c>, <c>/run</c>, <c>/sys</c> and <c>/temp</c>.
53 bool IncludeArtificialFS = false;
54
55 /// If \c false, empty directories remain in the result tree. Otherwise they are deleted
56 /// and do not appear in the tree.
58
59 /// If set (not containing \c nullptr), files are passed to this filter and removed if \c false
60 /// is returned.<br>
61 /// The term "files" here means all sort of files except Directories.
62 /// Directories are either real directories, or in case field #LinkTreatment is set to
63 /// \alib{files::ScanParameters;SymbolicLinks::RECURSIVE}, symbolic links that
64 /// target a directory.
65 ///
66 /// \see Optional filters #DirectoryFilterPreRecursion and #DirectoryFilterPostRecursion.
68
69 /// If set (not containing \c nullptr), this filter is invoked \b after a recursive scan of
70 /// a directory. If \c false is returned, the recursion is not performed, but the (empty)
71 /// directory remains in the result list, if field #RemoveEmptyDirectories evaluates to
72 /// \c false.<br>
73 /// Note that in case field #LinkTreatment is set to
74 /// \alib{files::ScanParameters;SymbolicLinks::RECURSIVE}, this filter
75 /// is also applied to symbolic links, which are readable, not broken and target a directory.
76 ///
77 /// \note
78 /// Directories (and symbolic links to directories) are first recursively scanned before this
79 /// filter is applied. On deletion, of course the whole scanned subtree is deleted.
80 /// This allows filtering directories, depending on information available only after
81 /// scanning, hence by the numbers retrieved with \alib{files;FInfo::Sums}.
82 /// To increase performance and filter directories \e before their recursive scan,
83 /// alternative field #DirectoryFilterPreRecursion is to be used.
84 ///
85 /// \see Optional filters #DirectoryFilterPreRecursion and #FileFilter.
86 ///
88
89 /// Same as #DirectoryFilterPostRecursion but is used \b before a recursive scan of
90 /// a directory. Consequently, this filter leads to much higher scan performance than the
91 /// alternative version, because huge branches of the file system might be omitted during scan.
92 /// However, the numbers retrieved with \alib{files;FInfo::Sums} will all indicate
93 /// \c 0, because no information is retrieved.<br>
94 /// If a directory is "pruned" due to this filter, the entry still occurs in the \b %FTree,
95 /// unless field #RemoveEmptyDirectories evaluates to \c true.<br>
96 ///
97 /// \see Optional filters #DirectoryFilterPostRecursion and #FileFilter.
98 ///
100
101 /// Constructor accepting all features.
102 /// @param startPath Stored in field #StartPath.
103 /// @param linkTreatment Stored in field #LinkTreatment. Defaults to \b SymbolicLinks::RECURSIVE.
104 /// @param maxDepth Stored in field #MaxDepth. Defaults to +InfiniteRecursion.
105 /// @param crossFileSystems Stored in field #CrossFileSystems. Defaults to \c true.
106 /// @param includeArtificialFS Stored in field #IncludeArtificialFS. Defaults to \c false.
109 unsigned int maxDepth = InfiniteRecursion,
110 bool crossFileSystems = true,
111 bool includeArtificialFS = false )
112 : StartPath (startPath )
113 , LinkTreatment (linkTreatment )
114 , MaxDepth (maxDepth )
115 , CrossFileSystems (crossFileSystems )
116 , IncludeArtificialFS(includeArtificialFS)
117 {}
118
119}; // struct ScanParameters
120
121/// A simple triple of a path string, a corresponding \b FTree node and a boolean to indicate
122/// whether the path existed already. This struct is used as an output parameter of function
123/// #ScanFiles.
125{
126 system::Path RealPath; ///< The 'real' absolute path to the node (no symbolic links included).
127 FTree::Cursor Node; ///< The node in the tree representing this path.
128 bool Existed; ///< If \c true, the complete path existed already in the target
129 ///< \alib{files;FTree}. However, it might have existed as
130 ///< another result path, hence not recursively scanned fashion and
131 ///< now is rescanned with 'higher' scan quality.
132
133 /// Constructor.
134 /// @param realPath Stored in field #RealPath.
135 /// @param node Stored in field #Node.
136 /// @param existed Stored in field #Existed.
137 ResultsPaths(const system::PathString& realPath, FTree::Cursor node, bool existed )
138 : RealPath(realPath)
139 , Node (node)
140 , Existed (existed)
141 {}
142};
143
144#if DOXYGEN
145/// ### General Information ###
146/// Scans the filesystem according to given \b ScanParameters and adds \alib{files;FInfo}
147/// entries to the given \alib{files;FTree}.
148///
149/// ### ALib FTree Data Contract ###
150/// This function has a contract with class \alib{files;FTree} that is used to store the scan results.
151/// This contract states that any file or directory found during a scan is always stored using
152/// the <em>"Real Path"</em> of the entry. This means that any symbolic link is resolved.
153/// The consequences are:
154/// - %Files and directories which represent a symbolic link are always "leaf nodes".
155/// (They never contain child nodes.). However, their symlink target path is attached twice
156/// to the entry:
157/// 1. The original link information given, which often uses relative path addressing.
158/// 2. The absolute, <em>"Real Path"</em> of the target, which has a corresponding result entry
159/// in the given \b %FTree.
160/// - If a using software wants to use symbolic paths, for example to present them to the end
161/// user, such paths have to be assembled by the user's code in own responsibility.
162/// All information for doing this is provided in the resulting tree object
163/// - Doubly linked target files and directories are never a problem for this scanner. Each
164/// file is scanned only once. This especially prevents all sorts of problems that would otherwise
165/// occur with cyclic symbolic links.
166/// - Due to this, even the given start path of a search might not be found as a result
167/// in given \b %FTree, because also start paths are converted to a <em>Real Path</em>.
168/// - The scan result may contain more than one resulting path. This happens, if a symbolic link
169/// targets a file or directory that is not recursively included in the start path.
170/// The resulting <em>"Real Path"</em> of the given start path is however always the first
171/// result added.
172///
173/// The latter is reflected with parameter \p{resultPaths} of this function, which is defined
174/// as a <c>std::vector</c>.
175///
176/// \note
177/// As class \alib{files;FTree} is based on class \alib{containers;StringTree}, using code
178/// is enabled to break this contract by adding entries below symbolic links.
179/// Other entities of this \alibmod_nl will not break this contract.
180///
181/// ### Rescanning of Entries ###
182/// Existing entries in the given \p{tree} are not overwritten. They might be scanned with "higher"
183/// \alib{files;FInfo::Qualities} values, depending on given \p{parameters} and how they had been
184/// scanned before. If the same "level" of scanning is provided, existing entries will not be
185/// scanned again. If a rescan of a certain path is wanted, then the target entry of that path has
186/// to be deleted before invoking this function. Due to the implementation of class FTree, repeated
187/// delete and scan operations will not cause any heap-memory activities (of course, as long as no
188/// new entries are detected).
189///
190/// ### platform-dependent Code Selection ###
191/// File scanning is a platform-dependent task and hence \b ALib uses one of two different
192/// implementations:
193/// 1. A posix version for posix compatible OSes,
194/// 2. A version that relies on <c>C++ std::filesystem</c>.
195///
196/// The fallback version using <c>std::filesystem</c> has the following restrictions:
197/// - The only time attribute available is the \alib{files;FInfo::MDate;modification time} of
198/// an entry. Fields \alib{files::FInfo;BDate}, \alib{files::FInfo;CDate}, and
199/// \alib{files::FInfo;ADate} are always set to the same as the
200/// modification time, even on filesystems that support the other values.
201/// - The file time of symbolic links is \b always that of the target file. The C++ standard has
202/// no possibility to access the link's time itself.
203/// - The file time of broken symbolic links is set to the current time (time of scanning).
204/// - The size that directories occupy on disk cannot be determined.
205/// Directory entries always report size <c>0</c>.
206/// - The target of a symbolic link which points to a non-accessible directory, cannot be resolved
207/// to a 'real' path, even if all other path components before are accessible.
208/// (This is true for the implementation of the standard library under GNU/Linux and Clang compiler
209/// at the time of writing this, 2024/02.)
210/// - Flag \alib{files;ScanParameters::CrossFileSystems} is ignored. Crossing Filesystems cannot
211/// be detected using purely the standard library.
212/// - A files' owner and owning group is not determined. Instead, \alib{files;FInfo::UnknownID} is
213/// set for both.
214/// - The scanning process is half as fast as in the Posix version. The reason for this is probably
215/// the internal allocation and deallocation of many quite volatile string objects in the C++
216/// standard library.
217/// Well, but it is still fast though!
218///
219/// \note As for today, using this module under WindowsOS, will fall back to the
220/// <em>C++ std::filesystem</em> version. It may be that a future version will provide a native
221/// implementation of this target system. Volunteers from the community are welcome to
222/// contribute.
223///
224/// @param tree The tree to fill.
225/// @param parameters The input parameters to determine the scan process.
226/// @param resultPaths A container to add the resulting list of 'real' paths and corresponding start
227/// nodes found during the search. The first entry added by this function is
228/// always the 'real'-version of field \alib{files::ScanParameters;StartPath}
229/// of the given \p{params} struct. Further paths/nodes pairs are created when
230/// symbolic links are found and followed.
231/// @param lock Pointer to an (optional) \alib{threads;SharedLock}.
232/// The overloaded version of this function that accepts \alib{SharedFTree}
233/// sets this to the instance found in the shared tree.<br>
234/// This parameter is available (and to be passed) only if the module
235/// \alib_threads is included in the \alibbuild.
236///
237/// @return Scan quality code of the tree node of the first resulting path, hence of the node
238/// referred to by \alib{files;ScanParameters::StartPath}. If this is erroneous,
239/// the start path was invalid, for example, not accessible, a broken link, a circular link,
240/// etc.
243 ScanParameters& parameters,
244 std::vector<ResultsPaths>& resultPaths,
245 SharedLock* lock );
246#else
249 ScanParameters& parameters,
250 std::vector<ResultsPaths>& resultPaths
251 IF_ALIB_THREADS( , SharedLock* lock) );
252#endif
253
254/// Invokes #ScanFiles( FTree&, ScanParameters&, std::vector<ResultsPaths>&,SharedLock*)
255/// passing the lock included in the given \alib{SharedFTree} as parameter \p{lock}.
256///
257/// @param tree The shared tree to fill.
258/// @param parameters The input parameters to determine the scan process.
259/// @param resultPaths The result paths.
260/// @return Scan quality code of the first resulting path.
261inline
263 ScanParameters& parameters,
264 std::vector<ResultsPaths>& resultPaths )
265{ return ScanFiles(*tree,parameters,resultPaths IF_ALIB_THREADS(, &tree.GetLock()) ); }
266
267} // namespace alib[::files]
268
269
270/// Type alias in namespace \b alib.
272
273/// Type alias in namespace \b alib.
275
276} // namespace [alib]
Qualities
Per-entry information about how a node was scanned.
Definition finfo.inl:123
TLock & GetLock() const noexcept
#define ALIB_DLL
Definition alib.inl:496
#define IF_ALIB_THREADS(...)
Definition alib.inl:401
#define ALIB_EXPORT
Definition alib.inl:488
ALIB_DLL FInfo::Qualities ScanFiles(FTree &tree, ScanParameters &parameters, std::vector< ResultsPaths > &resultPaths, SharedLock *lock)
String DBG_FILES_SCAN_VERBOSE_LOG_FORMAT
std::shared_ptr< FFilter > SPFileFilter
A shared pointer to a filter.
Definition ffilter.inl:44
strings::TString< PathCharType > PathString
The string-type used with this ALib Module.
Definition path.inl:33
files::ScanParameters ScanParameters
Type alias in namespace alib.
Definition fscanner.inl:271
threads::SharedLock SharedLock
Type alias in namespace alib.
files::TSharedFTree< SharedLock > SharedFTree
Type alias in namespace alib.
Definition ftree.inl:1029
system::Path Path
Type alias in namespace alib.
Definition path.inl:392
files::ResultsPaths ResultsPaths
Type alias in namespace alib.
Definition fscanner.inl:274
strings::TString< character > String
Type alias in namespace alib.
Definition string.inl:2381
ResultsPaths(const system::PathString &realPath, FTree::Cursor node, bool existed)
Definition fscanner.inl:137
system::Path RealPath
The 'real' absolute path to the node (no symbolic links included).
Definition fscanner.inl:126
FTree::Cursor Node
The node in the tree representing this path.
Definition fscanner.inl:127
Input parameters to function ScanFiles.
Definition fscanner.inl:20
SPFileFilter DirectoryFilterPreRecursion
Definition fscanner.inl:99
static constexpr unsigned int InfiniteRecursion
Denotes 'infinite' recursion if set to field MaxDepth.
Definition fscanner.inl:34
SymbolicLinks LinkTreatment
Denotes how symbolic links are treated.
Definition fscanner.inl:40
unsigned int MaxDepth
The maximum recursion depth. Defaults to InfiniteRecursion.
Definition fscanner.inl:43
ScanParameters(const system::PathString &startPath, SymbolicLinks linkTreatment=SymbolicLinks::RECURSIVE, unsigned int maxDepth=InfiniteRecursion, bool crossFileSystems=true, bool includeArtificialFS=false)
Definition fscanner.inl:107
SymbolicLinks
Options for processing symbolic links.
Definition fscanner.inl:23
@ DONT_RESOLVE
Demands not to resolve symbolic links in any way.
Definition fscanner.inl:24
Path StartPath
The path to be scanned.
Definition fscanner.inl:37
SPFileFilter DirectoryFilterPostRecursion
Definition fscanner.inl:87