ALib C++ Library
Library Version: 2402 R1
Documentation generated by doxygen
Loading...
Searching...
No Matches
fscanner.hpp
Go to the documentation of this file.
1/** ************************************************************************************************
2 * \file
3 * This header file is part of module \alib_files of the \aliblong.
4 *
5 * \emoji :copyright: 2013-2024 A-Worx GmbH, Germany.
6 * Published under \ref mainpage_license "Boost Software License".
7 **************************************************************************************************/
8#ifndef HPP_ALIB_CAMP_FILE_FSCANNER
9#define HPP_ALIB_CAMP_FILE_FSCANNER 1
10
11#if !defined(HPP_ALIB_FILES_FTREE)
12# include "alib/files/ftree.hpp"
13#endif
14
15#if !defined(HPP_ALIB_FILES_FFILTER)
16# include "alib/files/ffilter.hpp"
17#endif
18namespace alib { namespace files {
19
20
21/** Input parameters to function #ScanFiles. */
23{
24 /** Options for processing symbolic links. */
25 enum class SymbolicLinks
26 {
27 DONT_RESOLVE = 0, ///< Demands \b not to resolve symbolic links in any way.
28 RESOLVE_BUT_DONT_FOLLOW = 1, ///< Demands to read symbolic links, but not follow linked directories.
29 ///< FInfo dates, sizes and access rights are set according to
30 ///< the link target.
31 RECURSIVE = 2, ///< Read symbolic links and in case they are targeting a
32 ///< directory, recurse into, if this directory meets the
33 ///< other constraints associated with the current scan.
34 };
35
36 /** Denotes 'infinite' recursion if set to field #MaxDepth. */
37 static constexpr unsigned int InfiniteRecursion = (std::numeric_limits<unsigned int>::max)();
38
39 /** The path to be scanned. */
41
42 /** Denotes how symbolic links are treated.*/
44
45 /** The maximum recursion depth. Defaults to #InfiniteRecursion.*/
47
48 /** If \c true, the default, scanning does not stop recursion on directories which represent
49 * a mounted filesystem. If \c false, the search is restricted to the device that #StartPath
50 * resides in. */
51 bool CrossFileSystems = true;
52
53 /** If \c false (the default), scanning aborts if \e 'artificial' filesystems are found.
54 * Artificial filesystems under GNU/Linux, are for example:
55 * <c>/proc</c>, <c>/dev</c>, <c>/run</c>, <c>/sys</c> and <c>/temp</c>. */
56 bool IncludeArtificialFS = false;
57
58 /** If \c false, empty directories remain in the result tree. Otherwise they are deleted
59 * and do not appear in the tree. */
61
62 /** If set (not containing \c nullptr), files are passed to this filter and removed if \c false
63 * is returned.<br>
64 * The term "files" here means all sort of files except Directories.
65 * Directories are either real directories, or in case field #LinkTreatment is set to
66 * \alib{files::ScanParameters;SymbolicLinks::RECURSIVE}, symbolic links that
67 * target a directory.
68 *
69 * \see Optional filters #DirectoryFilterPreRecursion and #DirectoryFilterPostRecursion. */
71
72 /** If set (not containing \c nullptr), this filter is invoked \b after to a recursive scan of
73 * a directory. If \c false is returned, the recursion is not performed, but the (empty)
74 * directory remains in the result list, if field #RemoveEmptyDirectories evaluates to
75 * \c false.<br>
76 * Note that in case field #LinkTreatment is set to
77 * \alib{files::ScanParameters;SymbolicLinks::RECURSIVE}, this filter
78 * is also applied to symbolic links, which are readable, not broken and target a directory.
79 *
80 * \note
81 * Directories (and symbolic links to directories) are first recursively scanned before this
82 * filter is applied. On deletion, of-course the whole scanned subtree is deleted.
83 * This allows to filter directories, depending on information available only after
84 * scanning, hence by the numbers retrieved with \alib{files;FInfo::Sums}.
85 * To increase performance and filter directories \e prior to their recursive scan,
86 * alternative field #DirectoryFilterPreRecursion is to be used.
87 *
88 * \see Optional filters #DirectoryFilterPreRecursion and #FileFilter.
89 * */
91
92 /** Same as #DirectoryFilterPostRecursion but is used \b prior to a recursive scan of
93 * a directory. Consequently, this filter leads to much higher scan performance than the
94 * alternative version, because huge branches of the file system might be omitted during scan.
95 * However, the numbers retrieved with \alib{files;FInfo::Sums} will all indicate
96 * \c 0, because no information is retrieved.<br>
97 * If a directory is "pruned" due to this filter, the entry still occurs in the \b %FTree,
98 * unless field #RemoveEmptyDirectories evaluates to \c true.<br>
99 *
100 * \see Optional filters #DirectoryFilterPostRecursion and #FileFilter.
101 * */
103
104 /**
105 * Constructor accepting all features.
106 * @param startPath Stored in field #StartPath.
107 * @param linkTreatment Stored in field #LinkTreatment. Defaults to \b SymbolicLinks::RECURSIVE.
108 * @param maxDepth Stored in field #MaxDepth. Defaults to +InfiniteRecursion.
109 * @param crossFileSystems Stored in field #CrossFileSystems. Defaults to \c true.
110 * @param includeArtificialFS Stored in field #IncludeArtificialFS. Defaults to \c false.
111 */
112 ScanParameters( const String& startPath,
114 unsigned int maxDepth = InfiniteRecursion,
115 bool crossFileSystems = true,
116 bool includeArtificialFS = false )
117 : StartPath (startPath )
118 , LinkTreatment (linkTreatment )
119 , MaxDepth (maxDepth )
120 , CrossFileSystems (crossFileSystems )
121 , IncludeArtificialFS(includeArtificialFS)
122 {}
123
124}; // struct ScanParameters
125
126/**
127 * A simple triple of a path string, a corresponding \b FTree node and a boolean to indicated
128 * whether the path existed already. This struct is used as an output parameter of function
129 * #ScanFiles.
130 */
132{
133 AString RealPath; ///< The 'real' absolute path to the node (no symbolic links included).
134 FTree::Cursor Node; ///< The node in the tree representing this path.
135 bool Existed; ///< If \c true, the complete path existed already in the target
136 ///< \alib{files;FTree}. However, it might have existed as
137 ///< another result path, hence not recursively scanned fashion and
138 ///< now is rescanned with 'higher' scan quality.
139
140 /**
141 * Constructor.
142 * @param realPath Stored in field #RealPath.
143 * @param node Stored in field #Node.
144 * @param existed Stored in field #Existed.
145 */
146 ResultsPaths(const String& realPath, FTree::Cursor node, bool existed )
147 : RealPath(realPath)
148 , Node (node)
149 , Existed (existed)
150 {}
151};
152
153/**
154 * ### General Information ###
155 * Scans the filesystem according to given \b ScanParameters and adds \alib{files;FInfo}
156 * entries to the given \alib{files;FTree}.
157 *
158 * ### ALib %FTree Data Contract ###
159 * This function has a contract with class \alib{files;FTree} that is used to store the scan results.
160 * This contract states, that any file or directory found during scan is always stored using
161 * the <em>"Real Path"</em> of the entry. This means, any symbolic link is resolved.
162 * The consequences are:
163 * - %Files and directories which represent a symbolic link are always "leaf nodes".
164 * (They never contain child nodes.). However, their symlink target path is attached twice
165 * to the entry:
166 * 1. The original link information given, which often uses relative path addressing.
167 * 2. The absolute, <em>"Real Path"</em> of the target, which has a corresponding result entry
168 * in the given \b %FTree.
169 * - If a using software wants to use symbolic paths, for example to present them to the end
170 * user, such paths have to be assembled by the user's code in own responsibility.
171 * All information for doing this is provided in the resulting tree object
172 * - Doubly linked target files and directories are never a problem for this scanner. Each
173 * file is scanned only once. This especially prevents all sorts of problems that would otherwise
174 * occur with cyclic symbolic links.
175 * - Due to this, even the given start path of a search might not be found as a result
176 * in given \b %FTree, because also start paths are converted to a <em>Real Path</em>.
177 * - The scan result may contain more than one resulting path. This happens, if a symbolic link
178 * targets a file or directory that is not recursively included in the start path.
179 * The resulting <em>"Real Path"</em> of the given start path is however always the first
180 * result added.
181 *
182 * The latter is reflected with parameter \p{resultPaths} of this function, which is defined
183 * as a <c>std::vector</c>.
184 *
185 * \note
186 * As class \alib{files;FTree} is based on class \alib{monomem::StringTree}, using code
187 * is enabled to break this contract by adding entries below symbolic links.
188 * Other entities of this \alibmod_nl will not break this contract.
189 *
190 * ### Rescanning of Entries ###
191 * Existing entries in the given \p{tree} are not overwritten. They might be scanned with "higher"
192 * \alib{files;FInfo::Qualities} values; depending given \p{parameters} and how they had been scanned
193 * before. If the same "level" of scanning is provided, existing entries will not be scanned again.
194 * If a rescan of a certain path is wanted, then the target entry of that path has to be deleted
195 * prior to invoking this function. Due to the implementation of class FTree, repeated delete
196 * and scan operations will not cause any heap-memory activities (of-course, as long as no
197 * new entries are detected).
198 *
199 * ### Platform Dependent Code Selection ###
200 * File scanning is a platform dependent task and hence \b ALib switches provides two
201 * implementations:
202 * 1. A posix version for posix compatible OSes,
203 * 2. A version that relies on <c>C++ std::filesystem</c>.
204 *
205 * The fallback version using <c>std::filesystem</c> has the following restrictions:
206 * - The only time available is the \alib{files;FInfo::MTime;modification time} of
207 * an entry. The \alib{files;FInfo::MTime;creation time} and
208 * \alib{files;FInfo::MTime;last access time} are always set to the same as the
209 * modification time, even on filesystems that support the other values.
210 * - The file time of symbolic links is \b always that of the target file. The C++ standard has
211 * no possibility to access the link's time itself.
212 * - The file time of broken symbolic links is set to the current time (time of scanning).
213 * - The size that directories occupy on disk can not be determined. Directory entries always report
214 * size <c>0</c>.
215 * - The target of a symbolic that points to a non-accessible directory, can not be resolved to the
216 * a 'real' path, even if all other path components before are accessible.
217 * (This is true for the implementation of the standard library under GNU/Linux and Clang compiler
218 * at the time of writing this, 2024/02.)
219 * - Flag \alib{files;ScanParameters::CrossFileSystems} is ignored. Crossing Filesystems can not
220 * be detected using purely standard library.
221 * - A files' owner and owning group is not determined. Instead, \alib{files;FInfo::UnknownID} is
222 * set for both.
223 * - The scanning process is half as fast as in the Posix version. The reason for this is probably
224 * the internal allocation and de-allocation of many quite volatile string objects in the C++
225 * standard library.
226 * Well, but it is still fast though!
227 *
228 * \note As for today, using this module under WindowsOS, will fall back to the
229 * <em>C++ std::filesystem</em> version. It may be that a future version will provide a native
230 * implementation of this target system. Volunteers from the community are welcome to
231 * contribute.
232 *
233 * @param tree The tree to fill.
234 * @param parameters The input parameters to determine the scan process.
235 * @param resultPaths A container to add the resulting list of 'real' paths and corresponding start
236 * nodes found during the search. The first entry added by this function is
237 * always the 'real'-version of field \alib{files::ScanParameters;StartPath}
238 * of given \p{params} struct. Further paths/nodes pairs are created when
239 * symbolic links are found and followed.
240 *
241 * @return Scan quality code of the tree node of the first resulting path, hence of the node
242 * referred to by \alib{files;ScanParameters::StartPath}. If this is erroneous,
243 * the start path was invalid, for example not accessible, a broken link, a circular link,
244 * etc.
245 */
248 ScanParameters& parameters,
249 std::vector<ResultsPaths>& resultPaths );
250} // namespace alib[::files]
251
252
253/// Type alias in namespace \b alib.
255
256/// Type alias in namespace \b alib.
258
259} // namespace [alib]
260
261
262#endif // HPP_ALIB_CAMP_FILE_FSCANNER
#define ALIB_API
Definition alib.hpp:538
std::shared_ptr< FFilter > SPFileFilter
Definition ffilter.hpp:54
ALIB_API enum FInfo::Qualities ScanFiles(FTree &tree, ScanParameters &parameters, std::vector< ResultsPaths > &resultPaths)
Definition alib.cpp:57
AString RealPath
The 'real' absolute path to the node (no symbolic links included).
Definition fscanner.hpp:133
FTree::Cursor Node
The node in the tree representing this path.
Definition fscanner.hpp:134
ResultsPaths(const String &realPath, FTree::Cursor node, bool existed)
Definition fscanner.hpp:146
ScanParameters(const String &startPath, SymbolicLinks linkTreatment=SymbolicLinks::RECURSIVE, unsigned int maxDepth=InfiniteRecursion, bool crossFileSystems=true, bool includeArtificialFS=false)
Definition fscanner.hpp:112
SymbolicLinks LinkTreatment
Definition fscanner.hpp:43
static constexpr unsigned int InfiniteRecursion
Definition fscanner.hpp:37
SPFileFilter DirectoryFilterPostRecursion
Definition fscanner.hpp:90
SPFileFilter DirectoryFilterPreRecursion
Definition fscanner.hpp:102
@ DONT_RESOLVE
Demands not to resolve symbolic links in any way.