ALib C++ Library
Library Version: 2412 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
fscanner.hpp
Go to the documentation of this file.
1//==================================================================================================
2/// \file
3/// This header file is part of module \alib_files of the \aliblong.
4///
5/// \emoji :copyright: 2013-2024 A-Worx GmbH, Germany.
6/// Published under \ref mainpage_license "Boost Software License".
7//==================================================================================================
8#ifndef HPP_ALIB_CAMP_FILE_FSCANNER
9#define HPP_ALIB_CAMP_FILE_FSCANNER 1
10#pragma once
11#include "alib/files/ftree.hpp"
13
14
15// =================================================================================================
16// === Set preprocessor symbol ALIB_FILES_SCANNER_IMPL
17// =================================================================================================
18#if !defined(ALIB_FILES_FORCE_STD_SCANNER)
19# define ALIB_FILES_FORCE_STD_SCANNER 0
20#endif
21
22#if defined(ALIB_FILES_SCANNER_IMPL)
23# error "Symbol ALIB_FILES_SCANNER_IMPL must not be set from outside! Use ALIB_FILES_FORCE_STD_SCANNER."
24#endif
25
26#define ALIB_FILES_SCANNER_STDFS 1
27#define ALIB_FILES_SCANNER_POSIX 2
28
29#if ( ( defined(__GLIBCXX__) && !defined(__MINGW32__) ) \
30 || defined(__APPLE__) \
31 || defined(__ANDROID_NDK__) ) && !ALIB_FILES_FORCE_STD_SCANNER
32# define ALIB_FILES_SCANNER_IMPL ALIB_FILES_SCANNER_POSIX
33#else
34# define ALIB_FILES_SCANNER_IMPL ALIB_FILES_SCANNER_STDFS
35#endif
36
37
38
39namespace alib { namespace files {
40
41#if ALIB_DEBUG
42/// The format string used with verbose logging to domain <c>/ALIB/FILES/SCAN</c> during
43/// with namespace function \alib{files;ScanFiles}.<br>
44/// Defaults to <c>" {:ta h{2,r} on{10,r} gn{10,r} s(IEC){10,r} dm qqq nf l}"</c>
46#endif
47
48
49/// Input parameters to function #ScanFiles.
51{
52 /// Options for processing symbolic links.
53 enum class SymbolicLinks
54 {
55 DONT_RESOLVE = 0, ///< Demands \b not to resolve symbolic links in any way.
56 RESOLVE_BUT_DONT_FOLLOW = 1, ///< Demands to read symbolic links, but not follow linked directories.
57 ///< FInfo dates, sizes and access rights are set according to
58 ///< the link target.
59 RECURSIVE = 2, ///< Read symbolic links and in case they are targeting a
60 ///< directory, recurse into, if this directory meets the
61 ///< other constraints associated with the current scan.
62 };
63
64 /// Denotes 'infinite' recursion if set to field #MaxDepth.
65 static constexpr unsigned int InfiniteRecursion = (std::numeric_limits<unsigned int>::max)();
66
67 /// The path to be scanned.
69
70 /// Denotes how symbolic links are treated.
72
73 /// The maximum recursion depth. Defaults to #InfiniteRecursion.
75
76 /// If \c true, the default, scanning does not stop recursion on directories which represent
77 /// a mounted filesystem. If \c false, the search is restricted to the device that #StartPath
78 /// resides in.
79 bool CrossFileSystems = true;
80
81 /// If \c false (the default), scanning aborts if \e 'artificial' filesystems are found.
82 /// Artificial filesystems under GNU/Linux, are for example:
83 /// <c>/proc</c>, <c>/dev</c>, <c>/run</c>, <c>/sys</c> and <c>/temp</c>.
84 bool IncludeArtificialFS = false;
85
86 /// If \c false, empty directories remain in the result tree. Otherwise they are deleted
87 /// and do not appear in the tree.
89
90 /// If set (not containing \c nullptr), files are passed to this filter and removed if \c false
91 /// is returned.<br>
92 /// The term "files" here means all sort of files except Directories.
93 /// Directories are either real directories, or in case field #LinkTreatment is set to
94 /// \alib{files::ScanParameters;SymbolicLinks::RECURSIVE}, symbolic links that
95 /// target a directory.
96 ///
97 /// \see Optional filters #DirectoryFilterPreRecursion and #DirectoryFilterPostRecursion.
99
100 /// If set (not containing \c nullptr), this filter is invoked \b after a recursive scan of
101 /// a directory. If \c false is returned, the recursion is not performed, but the (empty)
102 /// directory remains in the result list, if field #RemoveEmptyDirectories evaluates to
103 /// \c false.<br>
104 /// Note that in case field #LinkTreatment is set to
105 /// \alib{files::ScanParameters;SymbolicLinks::RECURSIVE}, this filter
106 /// is also applied to symbolic links, which are readable, not broken and target a directory.
107 ///
108 /// \note
109 /// Directories (and symbolic links to directories) are first recursively scanned before this
110 /// filter is applied. On deletion, of course the whole scanned subtree is deleted.
111 /// This allows filtering directories, depending on information available only after
112 /// scanning, hence by the numbers retrieved with \alib{files;FInfo::Sums}.
113 /// To increase performance and filter directories \e before their recursive scan,
114 /// alternative field #DirectoryFilterPreRecursion is to be used.
115 ///
116 /// \see Optional filters #DirectoryFilterPreRecursion and #FileFilter.
117 ///
119
120 /// Same as #DirectoryFilterPostRecursion but is used \b before a recursive scan of
121 /// a directory. Consequently, this filter leads to much higher scan performance than the
122 /// alternative version, because huge branches of the file system might be omitted during scan.
123 /// However, the numbers retrieved with \alib{files;FInfo::Sums} will all indicate
124 /// \c 0, because no information is retrieved.<br>
125 /// If a directory is "pruned" due to this filter, the entry still occurs in the \b %FTree,
126 /// unless field #RemoveEmptyDirectories evaluates to \c true.<br>
127 ///
128 /// \see Optional filters #DirectoryFilterPostRecursion and #FileFilter.
129 ///
131
132 /// Constructor accepting all features.
133 /// @param startPath Stored in field #StartPath.
134 /// @param linkTreatment Stored in field #LinkTreatment. Defaults to \b SymbolicLinks::RECURSIVE.
135 /// @param maxDepth Stored in field #MaxDepth. Defaults to +InfiniteRecursion.
136 /// @param crossFileSystems Stored in field #CrossFileSystems. Defaults to \c true.
137 /// @param includeArtificialFS Stored in field #IncludeArtificialFS. Defaults to \c false.
140 unsigned int maxDepth = InfiniteRecursion,
141 bool crossFileSystems = true,
142 bool includeArtificialFS = false )
143 : StartPath (startPath )
144 , LinkTreatment (linkTreatment )
145 , MaxDepth (maxDepth )
146 , CrossFileSystems (crossFileSystems )
147 , IncludeArtificialFS(includeArtificialFS)
148 {}
149
150}; // struct ScanParameters
151
152/// A simple triple of a path string, a corresponding \b FTree node and a boolean to indicate
153/// whether the path existed already. This struct is used as an output parameter of function
154/// #ScanFiles.
156{
157 lang::system::Path RealPath; ///< The 'real' absolute path to the node (no symbolic links included).
158 FTree::Cursor Node; ///< The node in the tree representing this path.
159 bool Existed; ///< If \c true, the complete path existed already in the target
160 ///< \alib{files;FTree}. However, it might have existed as
161 ///< another result path, hence not recursively scanned fashion and
162 ///< now is rescanned with 'higher' scan quality.
163
164 /// Constructor.
165 /// @param realPath Stored in field #RealPath.
166 /// @param node Stored in field #Node.
167 /// @param existed Stored in field #Existed.
168 ResultsPaths(const lang::system::PathString& realPath, FTree::Cursor node, bool existed )
169 : RealPath(realPath)
170 , Node (node)
171 , Existed (existed)
172 {}
173};
174
175#if DOXYGEN
176/// ### General Information ###
177/// Scans the filesystem according to given \b ScanParameters and adds \alib{files;FInfo}
178/// entries to the given \alib{files;FTree}.
179///
180/// ### ALib FTree Data Contract ###
181/// This function has a contract with class \alib{files;FTree} that is used to store the scan results.
182/// This contract states that any file or directory found during a scan is always stored using
183/// the <em>"Real Path"</em> of the entry. This means that any symbolic link is resolved.
184/// The consequences are:
185/// - %Files and directories which represent a symbolic link are always "leaf nodes".
186/// (They never contain child nodes.). However, their symlink target path is attached twice
187/// to the entry:
188/// 1. The original link information given, which often uses relative path addressing.
189/// 2. The absolute, <em>"Real Path"</em> of the target, which has a corresponding result entry
190/// in the given \b %FTree.
191/// - If a using software wants to use symbolic paths, for example to present them to the end
192/// user, such paths have to be assembled by the user's code in own responsibility.
193/// All information for doing this is provided in the resulting tree object
194/// - Doubly linked target files and directories are never a problem for this scanner. Each
195/// file is scanned only once. This especially prevents all sorts of problems that would otherwise
196/// occur with cyclic symbolic links.
197/// - Due to this, even the given start path of a search might not be found as a result
198/// in given \b %FTree, because also start paths are converted to a <em>Real Path</em>.
199/// - The scan result may contain more than one resulting path. This happens, if a symbolic link
200/// targets a file or directory that is not recursively included in the start path.
201/// The resulting <em>"Real Path"</em> of the given start path is however always the first
202/// result added.
203///
204/// The latter is reflected with parameter \p{resultPaths} of this function, which is defined
205/// as a <c>std::vector</c>.
206///
207/// \note
208/// As class \alib{files;FTree} is based on class \alib{containers;StringTree}, using code
209/// is enabled to break this contract by adding entries below symbolic links.
210/// Other entities of this \alibmod_nl will not break this contract.
211///
212/// ### Rescanning of Entries ###
213/// Existing entries in the given \p{tree} are not overwritten. They might be scanned with "higher"
214/// \alib{files;FInfo::Qualities} values, depending on given \p{parameters} and how they had been
215/// scanned before. If the same "level" of scanning is provided, existing entries will not be
216/// scanned again. If a rescan of a certain path is wanted, then the target entry of that path has
217/// to be deleted before invoking this function. Due to the implementation of class FTree, repeated
218/// delete and scan operations will not cause any heap-memory activities (of course, as long as no
219/// new entries are detected).
220///
221/// ### platform-dependent Code Selection ###
222/// File scanning is a platform-dependent task and hence \b ALib uses one of two different
223/// implementations:
224/// 1. A posix version for posix compatible OSes,
225/// 2. A version that relies on <c>C++ std::filesystem</c>.
226///
227/// The fallback version using <c>std::filesystem</c> has the following restrictions:
228/// - The only time attribute available is the \alib{files;FInfo::MDate;modification time} of
229/// an entry. Fields \alib{files::FInfo;BDate}, \alib{files::FInfo;CDate}, and
230/// \alib{files::FInfo;ADate} are always set to the same as the
231/// modification time, even on filesystems that support the other values.
232/// - The file time of symbolic links is \b always that of the target file. The C++ standard has
233/// no possibility to access the link's time itself.
234/// - The file time of broken symbolic links is set to the current time (time of scanning).
235/// - The size that directories occupy on disk cannot be determined. Directory entries always report
236/// size <c>0</c>.
237/// - The target of a symbolic link which points to a non-accessible directory, cannot be resolved
238/// to a 'real' path, even if all other path components before are accessible.
239/// (This is true for the implementation of the standard library under GNU/Linux and Clang compiler
240/// at the time of writing this, 2024/02.)
241/// - Flag \alib{files;ScanParameters::CrossFileSystems} is ignored. Crossing Filesystems cannot
242/// be detected using purely the standard library.
243/// - A files' owner and owning group is not determined. Instead, \alib{files;FInfo::UnknownID} is
244/// set for both.
245/// - The scanning process is half as fast as in the Posix version. The reason for this is probably
246/// the internal allocation and deallocation of many quite volatile string objects in the C++
247/// standard library.
248/// Well, but it is still fast though!
249///
250/// \note As for today, using this module under WindowsOS, will fall back to the
251/// <em>C++ std::filesystem</em> version. It may be that a future version will provide a native
252/// implementation of this target system. Volunteers from the community are welcome to
253/// contribute.
254///
255/// @param tree The tree to fill.
256/// @param parameters The input parameters to determine the scan process.
257/// @param resultPaths A container to add the resulting list of 'real' paths and corresponding start
258/// nodes found during the search. The first entry added by this function is
259/// always the 'real'-version of field \alib{files::ScanParameters;StartPath}
260/// of the given \p{params} struct. Further paths/nodes pairs are created when
261/// symbolic links are found and followed.
262/// @param lock Pointer to an (optional) \alib{threads;SharedLock}.
263/// The overloaded version of this function that accepts \alib{SharedFTree}
264/// sets this to the instance found in the shared tree.<br>
265/// This parameter is available (and to be passed) only if the module
266/// \alib_threads is included in the \alibdist.
267///
268/// @return Scan quality code of the tree node of the first resulting path, hence of the node
269/// referred to by \alib{files;ScanParameters::StartPath}. If this is erroneous,
270/// the start path was invalid, for example, not accessible, a broken link, a circular link,
271/// etc.
274 ScanParameters& parameters,
275 std::vector<ResultsPaths>& resultPaths,
276 SharedLock* lock );
277#else
280 ScanParameters& parameters,
281 std::vector<ResultsPaths>& resultPaths
282 IF_ALIB_THREADS( , SharedLock* lock) );
283#endif
284
285/// Invokes #ScanFiles( FTree&, ScanParameters&, std::vector<ResultsPaths>&,SharedLock*)
286/// passing the lock included in the given \alib{SharedFTree} as parameter \p{lock}.
287///
288/// @param tree The shared tree to fill.
289/// @param parameters The input parameters to determine the scan process.
290/// @param resultPaths The result paths.
291/// @return Scan quality code of the first resulting path.
292inline
294 ScanParameters& parameters,
295 std::vector<ResultsPaths>& resultPaths )
296{ return ScanFiles(*tree,parameters,resultPaths IF_ALIB_THREADS(, &tree.GetLock()) ); }
297
298} // namespace alib[::files]
299
300
301/// Type alias in namespace \b alib.
303
304/// Type alias in namespace \b alib.
306
307} // namespace [alib]
308
309
310#endif // HPP_ALIB_CAMP_FILE_FSCANNER
311
Qualities
Per-entry information about how a node was scanned.
Definition finfo.hpp:134
TLock & GetLock() const noexcept
#define IF_ALIB_THREADS(...)
Definition alib.hpp:352
#define ALIB_API
Definition alib.hpp:639
String DBG_FILES_SCAN_VERBOSE_LOG_FORMAT
std::shared_ptr< FFilter > SPFileFilter
A shared pointer to a filter.
Definition ffilter.hpp:49
ALIB_API FInfo::Qualities ScanFiles(FTree &tree, ScanParameters &parameters, std::vector< ResultsPaths > &resultPaths, SharedLock *lock)
Definition alib.cpp:69
strings::TString< character > String
Type alias in namespace alib.
FTree::Cursor Node
The node in the tree representing this path.
Definition fscanner.hpp:158
ResultsPaths(const lang::system::PathString &realPath, FTree::Cursor node, bool existed)
Definition fscanner.hpp:168
lang::system::Path RealPath
The 'real' absolute path to the node (no symbolic links included).
Definition fscanner.hpp:157
Input parameters to function ScanFiles.
Definition fscanner.hpp:51
Path StartPath
The path to be scanned.
Definition fscanner.hpp:68
SymbolicLinks LinkTreatment
Denotes how symbolic links are treated.
Definition fscanner.hpp:71
unsigned int MaxDepth
The maximum recursion depth. Defaults to InfiniteRecursion.
Definition fscanner.hpp:74
static constexpr unsigned int InfiniteRecursion
Denotes 'infinite' recursion if set to field MaxDepth.
Definition fscanner.hpp:65
SPFileFilter DirectoryFilterPostRecursion
Definition fscanner.hpp:118
ScanParameters(const lang::system::PathString &startPath, SymbolicLinks linkTreatment=SymbolicLinks::RECURSIVE, unsigned int maxDepth=InfiniteRecursion, bool crossFileSystems=true, bool includeArtificialFS=false)
Definition fscanner.hpp:138
SPFileFilter DirectoryFilterPreRecursion
Definition fscanner.hpp:130
SymbolicLinks
Options for processing symbolic links.
Definition fscanner.hpp:54
@ DONT_RESOLVE
Demands not to resolve symbolic links in any way.