ALib C++ Library
Library Version: 2510 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
token.cpp
1// #################################################################################################
2// ALib C++ Library
3//
4// Copyright 2013-2025 A-Worx GmbH, Germany
5// Published under 'Boost Software License' (a free software license, see LICENSE.txt)
6// #################################################################################################
7#include "alib_precompile.hpp"
8#if !defined(ALIB_C20_MODULES) || ((ALIB_C20_MODULES != 0) && (ALIB_C20_MODULES != 1))
9# error "Symbol ALIB_C20_MODULES has to be given to the compiler as either 0 or 1"
10#endif
11#if ALIB_C20_MODULES
12 module;
13#endif
14// ====================================== Global Fragment ======================================
16// =========================================== Module ==========================================
17#if ALIB_C20_MODULES
18 module ALib.Strings.Token;
20 import ALib.EnumOps;
21# if ALIB_RESOURCES
22 import ALib.Resources;
23# endif
24# if ALIB_ENUMRECORDS
25 import ALib.EnumRecords;
26# endif
27#else
28# include "ALib.Strings.Token.H"
30# include "ALib.EnumOps.H"
31# include "ALib.EnumRecords.H"
32# include "ALib.Resources.H"
33#endif
34// ====================================== Implementation =======================================
35// Windows.h might bring in max/min macros
36#if defined( max )
37 #undef max
38 #undef min
39#endif
40
41namespace alib::strings::util {
42
43Token::Token(const String& pName, lang::Case sensitivity, int8_t minLength, const String& pExportName)
44: definitionName (pName)
45, exportName (pExportName)
46, format(Formats( int8_t(Formats::Normal )
47 + int8_t(sensitivity == lang::Case::Sensitive ? Formats(0) : ignoreCase) ) )
48{
49 minLengths[0]= minLength;
50#if ALIB_DEBUG
51 if( minLength < 0 || minLength > definitionName.Length() )
53
54 if( minLength == 0 )
56#endif
57
58}
59
60Token::Token( const String& definitionSrc, lang::Case sensitivity,
61 int8_t minLength1, int8_t minLength2, int8_t minLength3, int8_t minLength4, int8_t minLength5,
62 int8_t minLength6, int8_t minLength7 )
63: definitionName(definitionSrc)
64, minLengths { minLength1, minLength2, minLength3, minLength4, minLength5, minLength6, minLength7 }
65{
67 if( int(format) >= 0 && sensitivity == lang::Case::Ignore )
68 format= Formats( int8_t(format) | int8_t(ignoreCase) );
69}
70
71
72void Token::GetExportName(AString& target) const
73{
74 if( exportName.IsNotEmpty() )
75 {
76 target << exportName;
77 return;
78 }
79
80 target << GetDefinitionName();
81
82 // low the last character in if CamelCase and the last min length equals 0.
84 {
85 for( int i= 0 ; i < 7 ; ++i )
86 {
87 auto minLen= GetMinLength( i );
88 if( minLen == 0 )
89 {
90 target[target.Length()-1]= characters::ToLower(target[target.Length()-1]);
91 break;
92 }
93 if( minLen == -1 )
94 break;
95 }
96 }
97}
98
99#if ALIB_ENUMRECORDS
100void Token::Define( const String& definitionSrc, character separator )
101{
102 minLengths[0]= 0;
103 minLengths[1]= -1;
105
106 Substring parser(definitionSrc);
107
108 // name
109 definitionName = Substring( parser.ConsumeToken( separator ) ).Trim();
110 if( definitionName.IsEmpty() )
111 return;
112
114 size_t qtyMinLengths= 0;
115 if(parser.IsNotEmpty() )
116 {
117 // letter case sensitivity
118 if( !enumrecords::Parse( parser, letterCase ) )
119 {
121 return;
122 }
123
124 // list of minimum length values
125 while(parser.ConsumeChar( separator ) )
126 {
127 if( qtyMinLengths >= 7 )
128 {
130 return;
131 }
132
133 if( !isdigit(parser.CharAtStart()) )
134 {
135 // optionally read export name once
136 if( exportName.IsNotNull() )
137 {
139 return;
140 }
141
143
144 if( exportName.IsEmpty() )
145 {
147 return;
148 }
149
150 continue;
151 }
152
153 parser.ConsumeDecDigits( minLengths[qtyMinLengths++] );
154 }
155 }
156
157 if( qtyMinLengths == 0 )
158 minLengths[0]= int8_t( definitionName.Length() );
159
160 if( qtyMinLengths > 0 && qtyMinLengths < 7 )
161 minLengths[qtyMinLengths]= -1;
162
163 #if ALIB_DEBUG
164 if( parser.IsNotEmpty() )
165 {
167 return;
168 }
169 #endif
170
171 detectFormat();
172
173 #if ALIB_DEBUG
174 if( int(format) < 0 )
175 return;
176 #endif
177
178 if( letterCase == lang::Case::Ignore )
180}
181#endif //ALIB_ENUMRECORDS
182
184{
185 // detect number of min length values
186 int qtyMinLength= 1;
187 while( qtyMinLength < 7 && minLengths[qtyMinLength] >= 0 )
188 ++qtyMinLength;
189
190 // just one length given? Keep format "normal"
192 if( qtyMinLength > 1 )
193 {
194 // count hyphens, underscores, camel humps...
195 bool hasLowerCases= isalpha(definitionName[0]) && islower(definitionName[0]);
196 int qtyUpperCases= 0;
197 int qtyUnderscores= 0;
198 int qtyHyphens= 0;
199 for( integer idx= 1; idx < definitionName.Length() ; ++idx )
200 {
202 if( c == '_' ) ++qtyUnderscores;
203 else if( c == '-' ) ++qtyHyphens;
204 else if( isalpha(c) )
205 {
206 if( islower(c) )
207 hasLowerCases= true;
208 else
209 ++qtyUpperCases;
210 }
211 else
212 hasLowerCases= true;
213 }
214
215 // Snake_Case?
216 if( qtyUnderscores > 0 )
217 {
219 #if ALIB_DEBUG
220 if( (qtyUnderscores >= 7 && qtyMinLength != 7 )
221 || (qtyUnderscores < 7 && qtyMinLength != qtyUnderscores + 1 ) )
223 #endif
224 }
225
226 // Kebab-Case?
227 else if( qtyHyphens > 0 )
228 {
230 #if ALIB_DEBUG
231 if( (qtyHyphens >= 7 && qtyMinLength != 7 )
232 || (qtyHyphens < 7 && qtyMinLength != qtyHyphens + 1 ) )
234 #endif
235 }
236
237 // CamelCase
238 else if( hasLowerCases && ( qtyUpperCases > 0 ) )
239 {
241 #if ALIB_DEBUG
242 if( (qtyUpperCases >= 7 && qtyMinLength != 7 )
243 || (qtyUpperCases < 7 && qtyMinLength != qtyUpperCases + 1 ) )
245 #endif
246 }
247
248 // normal
249 #if ALIB_DEBUG
250 else
252 #endif
253 }
254
255 // check segment sizes against minLengths
256 #if ALIB_DEBUG
257 if( int(format) < 0 )
258 return;
259
260 if( GetFormat() == Formats::Normal )
261 {
262 if( minLengths[0] > definitionName.Length() )
263 {
265 return;
266 }
267 if( minLengths[0] <= 0 )
268 {
270 return;
271 }
272 }
273 else
274 {
275 int segmentNo = 0;
276 int segmentLength= 0;
277 integer charIdx = 1;
278 while( charIdx < definitionName.Length() )
279 {
280 ++segmentLength;
281 character c= definitionName.CharAt( charIdx++ );
282 bool segmentEnd= c == '\0'
283 || (format == Formats::SnakeCase && c == '_' )
284 || (format == Formats::KebabCase && c == '-' )
285 || (format == Formats::CamelCase && isalpha(c) && isupper(c) );
286
287 if( segmentEnd )
288 {
289 if( segmentNo < 7 && minLengths[segmentNo] > segmentLength )
290 {
292 return;
293 }
294
295 segmentLength= (format == Formats::CamelCase ? 1 : 0);
296 ++segmentNo;
297 }
298 }
299
300 for( int minLenIdx= 0 ; minLenIdx < 7 && minLengths[minLenIdx] >= 0 ; ++minLenIdx )
301 {
302 if( minLengths[minLenIdx] == 0
304 || !( minLenIdx == 6 || minLengths[minLenIdx + 1] == -1 ) ) )
305 {
307 return;
308 }
309 }
310 }
311 #endif
312
313
314}
315
316bool Token::Match( const String& needle )
317{
318 ALIB_ASSERT_ERROR( needle.Length() > 0, "STRINGS/TOK",
319 "Empty search string in when matching function name." )
320 lang::Case sensitivity= Sensitivity();
321
322 Formats caseType= GetFormat();
323 bool isNormal= (caseType == Formats::Normal );
324 bool isCamel = (caseType == Formats::CamelCase );
325 bool isSnake = (caseType == Formats::SnakeCase );
326 bool isKebab = (caseType == Formats::KebabCase );
327
328 int segNo = 0;
329 int segLen = 0;
330 bool same = false;
331 integer hIdx = 0;
332 integer nIdx = 0;
333 integer rollbackLen = 0;
334 bool isSegOK = false;
335 int segMinLen = minLengths[0];
336 while( hIdx < definitionName.Length() )
337 {
338 // read current haystack and needle
339 ++segLen;
340 character h= definitionName .CharAt( hIdx++ );
341 character n= needle.CharAt( nIdx++ );
342
343 same= sensitivity == lang::Case::Ignore
346 : h
347 == n;
348
349 // special CamelCase treatment
350 if( isCamel )
351 {
352 // end of needle and final, omitable segment?
353 if( n == '\0' && segMinLen == 0)
354 return true;
355
356 // rollback
357 if( !same )
358 {
359 if( segLen == 1 && rollbackLen > 0)
360 {
361 nIdx-= 2;
362 --rollbackLen;
363 --hIdx;
364 --segLen;
365 continue;
366 }
367
368 --nIdx;
369 }
370
371 if( segLen == 1)
372 rollbackLen= 0;
373
374 else if( same && isSegOK )
375 ++rollbackLen;
376 }
377
378 // end of haystack segment?
379 bool isSegEnd= hIdx == definitionName.Length()
380 || (isSnake && h == '_' )
381 || (isKebab && h == '-' )
382 || (isCamel && isalpha(definitionName.CharAt( hIdx ))
383 && isupper(definitionName.CharAt( hIdx )) );
384
385 // update segOK flag
386 if( same )
387 {
388 isSegOK= ( ( segMinLen >= 0 && segLen >= segMinLen )
389 || ( segMinLen < 0 && isSegEnd ) );
390 }
391
392 // result false, if not same and first of actual segment
393 else if( segLen == 1 && segMinLen != 0 )
394 return false;
395
396
397 // end of segment and needle not empty?
398 if( isSegEnd && n != '\0')
399 {
400 if( !isSegOK )
401 return false;
402 }
403
404 // not same and either not end of segment or empty needle
405 else if( !same )
406 {
407 if( !isSegOK )
408 return false;
409
410 // skip rest of segment
411 while( h != '\0'
412 && ( ( isCamel && (!isalpha(h) || !isupper(h) ) )
413 || ( isSnake && h != '_' )
414 || ( isKebab && h != '-' ) ) )
415 h= definitionName.CharAt( hIdx++ );
416
417 if( isCamel )
418 --hIdx;
419 }
420
421 // start new segment
422 if( !same || isSegEnd )
423 {
424 ++segNo;
425 segLen= 0;
426 segMinLen = segNo < 7 ? minLengths[segNo] : -2;
427
428 // oh,oh!
429 if( n == '\0' && (!isCamel || h == '\0' || rollbackLen == 0) )
430 return h == '\0' || isNormal || segMinLen == 0;
431 }
432 }
433
434 return same && isSegOK && (nIdx == needle.Length());
435}
436
437} // namespace [alib::strings::util]
438
constexpr integer Length() const
Definition string.inl:318
TChar CharAtStart() const
Definition string.inl:440
TChar CharAt(integer idx) const
Definition string.inl:421
constexpr bool IsNotEmpty() const
Definition string.inl:371
bool ConsumeDecDigits(std::integral auto &result)
TSubstring & Trim(const TCString< TChar > &whiteSpaces=CStringConstantsTraits< TChar >::DefaultWhitespaces())
TString< TChar > ConsumeToken(TChar separator=',', lang::Inclusion includeSeparator=lang::Inclusion::Include)
@ ErrorReadingSensitivity
Sensitivity value not found.
Definition token.inl:151
@ TooManyMinLengthsGiven
A maximum of 7 minimum length values was exceeded.
Definition token.inl:153
@ ErrorReadingMinLengths
Error parsing the list of minimum lengths.
Definition token.inl:152
@ DefinitionStringNotConsumed
The definition string was not completely consumed.
Definition token.inl:160
ALIB_DLL void detectFormat()
Detects snake_case, kebab-case or CamelCase.
Definition token.cpp:183
int8_t GetMinLength(int idx) const
Definition token.inl:339
String definitionName
The tokens' definition string part.
Definition token.inl:168
const String & GetDefinitionName() const
Definition token.inl:274
lang::Case Sensitivity() const
Definition token.inl:320
Formats
Format types detected with detectFormat.
Definition token.inl:135
@ CamelCase
UpperCamelCase or lowerCamelCase.
Definition token.inl:139
@ SnakeCase
snake_case using underscores.
Definition token.inl:137
@ Normal
Normal, optionally abbreviated words.
Definition token.inl:136
@ KebabCase
kebab-case using hyphens.
Definition token.inl:138
Token()
Parameterless constructor. Creates an "undefined" token.
Definition token.inl:190
Formats format
Defines the "case type" as well as the letter case sensitivity of this token.
Definition token.inl:175
String exportName
The tokens' optional explicit export name.
Definition token.inl:171
static constexpr Formats ignoreCase
Letter case sensitivity. This is combined with the format bits.
Definition token.inl:183
Formats GetFormat() const
Definition token.inl:306
ALIB_DLL void Define(const String &definition, character separator=';')
Definition token.cpp:100
ALIB_DLL bool Match(const String &needle)
Definition token.cpp:316
ALIB_DLL void GetExportName(AString &target) const
Definition token.cpp:72
#define ALIB_ASSERT_ERROR(cond, domain,...)
Definition alib.inl:1049
#define ALIB_REL_DBG(releaseCode,...)
Definition alib.inl:838
TChar ToUpper(TChar c)
TChar ToLower(TChar c)
bool Parse(strings::TSubstring< TChar > &input, TEnum &result)
Case
Denotes upper and lower case character treatment.
@ Exclude
Chooses exclusion.
strings::TAString< character, lang::HeapAllocator > AString
Type alias in namespace alib.
lang::integer integer
Type alias in namespace alib.
Definition integers.inl:149
strings::TString< character > String
Type alias in namespace alib.
Definition string.inl:2381
characters::character character
Type alias in namespace alib.
strings::TSubstring< character > Substring
Type alias in namespace alib.