ALib C++ Library
Library Version: 2412 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
parser_impl.cpp
1// #################################################################################################
2// ALib C++ Library
3//
4// Copyright 2013-2024 A-Worx GmbH, Germany
5// Published under 'Boost Software License' (a free software license, see LICENSE.txt)
6// #################################################################################################
8
9#if !DOXYGEN
14#endif // !DOXYGEN
15
16
17namespace alib { namespace expressions { namespace detail {
18
19// #################################################################################################
20// Parser
21// #################################################################################################
22
24: compileTimeAllocator(allocator)
25, compiler (pCompiler)
26, unaryOperators (allocator)
27, binaryOperators (allocator)
28{
29 // characters to be known
30 syntaxTokens [static_cast<unsigned char>('(')]= true;
31 syntaxTokens [static_cast<unsigned char>(')')]= true;
32 syntaxTokens [static_cast<unsigned char>(',')]= true;
33
34 operatorChars[static_cast<unsigned char>('?')]= true;
35 operatorChars[static_cast<unsigned char>(':')]= true;
36
37 // define unary ops
38 for( auto& op : compiler.UnaryOperators )
39 {
40 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op), "EXPR",
41 "Doubly defined unary operator symbol {!Q'}.", op )
42
43 unaryOperators.EmplaceUnique(op);
44 for( auto it : op )
45 operatorChars[static_cast<unsigned char>(it)]= true;
46 }
47
49 {
50 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op.first), "EXPR",
51 "Doubly defined unary operator symbol {!Q'}.", op.first )
52
53 unaryOperators.EmplaceUnique(op.first);
54 if( !isalpha( op.first.CharAtStart() ) )
55 for( auto it : op.first )
56 operatorChars[static_cast<unsigned char>(it)]= true;
57 }
58
59
60 for( auto op : compiler.BinaryOperators )
61 {
62 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
63 "Doubly defined binary operator symbol {!Q'}.", op.first )
64 if( op.first == A_CHAR("[]") )
65 {
66 syntaxTokens[static_cast<unsigned char>('[')]= true;
67 syntaxTokens[static_cast<unsigned char>(']')]= true;
68 }
69 else
70 {
71 binaryOperators.EmplaceUnique(op.first);
72 for( auto it : op.first )
73 operatorChars[static_cast<unsigned char>(it)]= true;
74 }
75 }
76
77 for( auto op : compiler.AlphabeticBinaryOperatorAliases )
78 {
79 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
80 "Doubly defined binary operator symbol {!Q'}.", op.first )
81
82 ALIB_DBG( auto originalOp= )
83 compiler.BinaryOperators.Find( op.second );
84 ALIB_ASSERT_ERROR( originalOp != compiler.BinaryOperators.end(), "EXPR",
85 "Alias {!Q'} defined for unknown operator {!Q'}.",
86 op.first, op.second )
87
88 binaryOperators.EmplaceUnique(op.first);
89 if( !isalpha( op.first.CharAtStart() ) )
90 for( auto it : op.first )
91 operatorChars[static_cast<unsigned char>(it)]= true;
92 }
93}
94
95// #################################################################################################
96// Lexer
97// #################################################################################################
99{
102
103 if( scanner.IsEmpty() )
104 {
105 token= Tokens::EOT;
106 return;
107 }
108
110
111 //------------------------------ Syntax Tokens ------------------------------
112 if( syntaxTokens[static_cast<unsigned char>(first)] )
113 {
114 token= Tokens(first);
116 return;
117 }
118
119 //------------------------------ Symbolic operators ------------------------------
120 // read up to 3 operator characters
121 if( operatorChars[static_cast<unsigned char>(first)] )
122 {
123 integer operatorLength= 1;
125 if( operatorChars[static_cast<unsigned char>(scanner.CharAtStart() ) ] )
126 {
128 ++operatorLength;
129
130 if( operatorChars[static_cast<unsigned char>(scanner.CharAtStart() ) ] )
131 {
133 ++operatorLength;
134 }
135 }
136
137 token= Tokens::SymbolicOp;
139 tokString= String( expression.Buffer() + tokPosition, operatorLength );
141
142 // special treatment for Elvis with spaces "? :"
143 if( tokString == A_CHAR("?") && compiler.BinaryOperators.Contains( A_CHAR("?:") ) )
144 {
145 // patch existing token and return
146 Substring backup= scanner;
147 if( scanner.TrimStart().CharAtStart() == ':' )
148 {
149 tokString= A_CHAR("?:");
151 }
152 else
153 scanner= backup;
154 }
155 return;
156 }
157
158 //------------------------------ alphabetic operators ------------------------------
159 if( isalpha( first ) )
160 {
161 integer len= 1;
162 while( len < scanner.Length() && ( isalpha( scanner[len] ) || scanner[len] == '_' ) )
163 ++len;
164 tokString= scanner.Substring<NC>( 0, len );
165 auto hashCode= tokString.HashcodeIgnoreCase();
166
167 // unary
168 {
169 decltype(unaryOperators)::Iterator it;
170 if( (it= unaryOperators .Find( tokString, hashCode )) != unaryOperators.end()
172 || tokString.Equals<NC>( it.Value() ) ) )
173 {
175 token= Tokens::AlphaUnOp;
176 return;
177 }
178 }
179
180 // binary
181 {
182 decltype(binaryOperators)::Iterator it;
183 if( (it= binaryOperators .Find( tokString, hashCode )) != binaryOperators.end()
185 || tokString.Equals<NC>( it.Value() ) ) )
186 {
188 token= Tokens::AlphaBinOp;
189 return;
190 }
191 }
192
193 }
194
195 //------------------------------ Identifiers ------------------------------
196 if( isalpha( first ) || first == '_' )
197 {
198 integer endOfIdent= 0;
199 character next= 0;
200 while( ++endOfIdent < scanner.Length()
201 && ( isalnum( next= scanner[endOfIdent] )
202 || next == '_' ) );
203
204 token= Tokens::Identifier;
206 tokString= String( expression.Buffer() + tokPosition, endOfIdent );
208 scanner.ConsumeChars<NC>( endOfIdent );
209 return;
210 }
211
212 //------------------------------ numbers ------------------------------
213 if( isdigit( first ) )
214 {
215 integer endOfDecPart= 0;
216 character next= 0;
217 while( ++endOfDecPart < scanner.Length()
218 && ( isdigit( next= scanner[endOfDecPart] )
219 || ( HasBits(numberFormat->Flags, NumberFormatFlags::ReadGroupChars) && next== numberFormat->ThousandsGroupChar ) )
220 );
221
222
223 // float number
224 if( next == numberFormat->DecimalPointChar
225 || next == 'e'
226 || next == 'E'
228
229 {
230 auto oldStart= scanner.Buffer();
231 double value;
233 token = Tokens::LitFloat;
234 tokFloat= value;
235
236 String numberParsed( oldStart, scanner.Buffer() - oldStart );
237 tokLiteralHint= numberParsed.IndexOf('e') > 0
238 || numberParsed.IndexOf('E') > 0
239 || numberParsed.IndexOf( numberFormat->ExponentSeparator ) > 0
242 }
243
244 // integer number
245 else
246 {
254
255 integer value;
257 token= Tokens::LitInteger;
258 tokInteger= value;
259 }
260
261 return;
262 }
263
264 //------------------------------ Strings ------------------------------
265 if( first == '"' )
266 {
267 bool lastWasSlash= false;
269 character next;
270 while( (next= scanner.ConsumeChar()) != '\0' )
271 {
272 if( next == '\\' ) { lastWasSlash= true; continue; }
273 if( next == '\"' && !lastWasSlash ) break;
274 lastWasSlash= false;
275 }
276
277 if( next != '"' )
278 {
280 EXPRESSIONS.GetResource("EE4") );
283 throw e;
284 }
285
287 String quoted( expression.Buffer() + tokPosition + 1,
290 token = Tokens::LitString;
292 return;
293 }
294
295 // -------- unrecognized token ---------
298 throw e;
299}
300
301
302
303// #################################################################################################
304// Parser
305// #################################################################################################
306#define Start parseConditional
307
309{
310 if( exprString.IsEmpty() )
312
313 expression = exprString;
314 numberFormat= nf;
316 ASTs->reserve(20);
317
318 // load first token
320 NextToken();
321
322//ALIB_DBG( lexer.DbgListTokens(); )
323
324 AST* ast= Start();
325
326
327 // if tokens remain, an "operator" would be expected
328 if( token != Tokens::EOT )
329 {
332 throw e;
333 }
334
335 return ast;
336}
337
338
340{
341 // parse lhs as simple
342 push( parseBinary() ); // Q
343
344 integer qmPosition= tokPosition;
345
346
347 if( token == Tokens::SymbolicOp && tokString == A_CHAR("?") )
348 {
349 NextToken();
350 push( Start() ); // T
351
352 // expect colon
353 if( token != Tokens::SymbolicOp || tokString != A_CHAR(":") )
354 {
357 throw e;
358 }
359 integer colonPosition= tokPosition;
360
361 NextToken();
362
363 AST* F= Start();
364 AST* T= pop();
365 AST* Q= pop();
366 return compileTimeAllocator().New<ASTConditional>(Q, T, F, qmPosition, colonPosition );
367 }
368
369 // was no conditional
370 return pop();
371}
372
374{
375 // parse lhs as simple
376 push( parseSimple() );
377
378 // parse
379 integer position= tokPosition;
380 String binOp;
381 for( ;; )
382 {
383 binOp= getBinaryOp();
384 if( binOp.IsNull() )
385 return pop();
386
387 // rhs is braced? -> lhs becomes <lhs op rhs> and we start over
388 if( token == Tokens::BraceOpen )
389 {
390 replace( compileTimeAllocator().New<ASTBinaryOp>(binOp, top(), parseSimple(), position ) );
391 position= tokPosition;
392 continue;
393 }
394 break;
395 }
396
397 // check if tokens remain
398 if( token == Tokens::EOT )
399 {
402 throw e;
403 }
404
405 AST* lhs= top();
406 AST* rhs= push( parseBinary() );
407
408 int binOpPrecedence= compiler.GetBinaryOperatorPrecedence( binOp );
409 AST* replace = rhs;
410 ASTBinaryOp* parent = nullptr;
411 while( replace->NodeType == AST::Types::BinaryOp
412 && compiler.GetBinaryOperatorPrecedence(dynamic_cast<ASTBinaryOp*>(replace)->Operator) <= binOpPrecedence )
413 {
414 parent = dynamic_cast<ASTBinaryOp*>(replace);
415 replace= parent->Lhs;
416 }
417
418 pop();
419 pop();
420 if( parent == nullptr )
421 return compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, rhs, position );
422
423 // insert binary at lhs of deepest equal-level binary found.
424 // Its current lhs becomes its new lhs-child's rhs.
425 parent->Lhs= compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, parent->Lhs, position );
426 return rhs;
427}
428
430{
431 // '(' expr ')' (brackets)
432 if( token == Tokens::BraceOpen )
433 {
434 NextToken();
435 push( Start() );
436
437 if( token != Tokens::BraceClose )
438 {
441 throw e;
442 }
443 NextToken();
444 replace( parseSubscript( top() ) );
445 return pop();
446 }
447
448 // unary operator
449 integer position= tokPosition;
450 {
451 String unOp= getUnaryOp();
452 if( unOp.IsNotNull() )
453 {
454 push( compileTimeAllocator().New<ASTUnaryOp>(unOp, parseSimple(), position ) );
455 replace( parseSubscript( top() ) );
456 return pop();
457 }
458 }
459
460 // terminals
461 if( token == Tokens::LitInteger ) { push(compileTimeAllocator().New<ASTLiteral>(tokInteger, position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) ); return pop(); }
462 if( token == Tokens::LitFloat ) { push(compileTimeAllocator().New<ASTLiteral>(tokFloat , position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) ); return pop(); }
463 if( token == Tokens::LitString ) { push(compileTimeAllocator().New<ASTLiteral>(String(compileTimeAllocator, tokString), position )); NextToken(); replace( parseSubscript(top()) ); return pop(); }
464 if( token == Tokens::Identifier || token == Tokens::AlphaBinOp ) // allow bin op's names here! This is tricky but right!
465 {
466 String name= tokString;
467 NextToken();
468
469 // function
470 if( token == Tokens::BraceOpen )
471 {
472 ASTFunction* astFunction= compileTimeAllocator().New<ASTFunction>( name, position, compileTimeAllocator );
473 push( astFunction );
474 for(;;)
475 {
476 NextToken();
477 if( token == Tokens::BraceClose )
478 {
479 NextToken();
480 return pop();
481 }
482 astFunction->Arguments.EmplaceBack( Start() );
483
484 if( token == Tokens::Comma )
485 continue;
486
487 if( token != Tokens::BraceClose )
488 {
491 throw e;
492 }
493
494 NextToken();
495 replace( parseSubscript( astFunction ) );
496 return pop();
497 }
498 }
499
500 // identifier
501 replace( parseSubscript( push(compileTimeAllocator().New<ASTIdentifier>( String(compileTimeAllocator, name), position ) ) ) );
502 return pop();
503 }
504
505 // ---------------------------------------- ERRORS -----------------------------------------
506 if( token == Tokens::EOT )
507 {
510 throw e;
511 }
512
513 if( token == Tokens::BraceClose )
514 {
517 throw e;
518 }
519
520 if( token == Tokens::SubscriptOpen || token == Tokens::SubscriptClose )
521 {
524 throw e;
525 }
526
527 if( token == Tokens::Comma )
528 {
531 throw e;
532 }
533
534 ALIB_ERROR( "EXPR", "Internal error. This should never happen.")
535 return nullptr;
536}
537
539{
541 || token != Tokens::SubscriptOpen )
542 return function;
543
544 integer position= tokPosition;
545
546 NextToken();
547
548 push( Start() );
549
550 if( token != Tokens::SubscriptClose )
551 {
554 throw e;
555 }
556
557 // success
558 NextToken();
559 return compileTimeAllocator().New<ASTBinaryOp>( A_CHAR("[]"), function, pop(), position );
560}
561
562
563// #################################################################################################
564// Helpers
565// #################################################################################################
566
567
569{
570 if( token == Tokens::SymbolicOp )
571 {
572 // symbolic unary ops may be nested. Hence, we find one by one from the actual token and consume the
573 // token only if all is consumed.
574 for( integer partialRead= 1 ; partialRead <= tokString.Length() ; ++partialRead )
575 {
576 Substring key= Substring( tokString.Buffer(), partialRead );
577 if( unaryOperators.Contains( key ) )
578 {
579 if( partialRead == tokString.Length() )
580 NextToken();
581 else
582 {
584 tokString= String( tokString.Buffer() + partialRead,
585 tokString.Length() - partialRead );
586 tokPosition+= partialRead;
588 }
589 return key;
590 }
591 }
594 throw e;
595 }
596 else if ( token == Tokens::AlphaUnOp )
597 {
598 String alphabeticOperator= tokString;
599 NextToken();
600 return alphabeticOperator;
601 }
602
603 return NULL_STRING;
604}
605
607{
608 if ( token == Tokens::SymbolicOp )
609 {
610 // ignore ternary
611 if ( tokString == A_CHAR( "?" ) || tokString == A_CHAR( ":" ) )
612 return NULL_STRING;
613
614 // binary ops may be longer and concatenated with unaries. So we consume as much as possible
615 // but are happy with less than available
616 for ( integer partialRead = tokString.Length(); partialRead > 0; --partialRead )
617 {
618 Substring key = Substring( tokString.Buffer(), partialRead );
619 if ( binaryOperators.Contains( key ) )
620 {
621 if ( partialRead == tokString.Length() )
622 NextToken();
623 else
624 {
626 tokString = String( tokString.Buffer() + partialRead,
627 tokString.Length() - partialRead );
628 tokPosition += partialRead;
630 }
631 return key;
632 }
633 }
634
637 throw e;
638 }
639 else if ( token == Tokens::AlphaBinOp )
640 {
641 String alphabeticOperator= tokString;
642 NextToken();
643 return alphabeticOperator;
644 }
645
646 return NULL_STRING;
647}
648
649
650#undef Start
651
652}}} // namespace [alib::expressions::detail]
HashMap< MonoAllocator, String, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > AlphabeticUnaryOperatorAliases
Definition compiler.hpp:132
HashMap< MonoAllocator, String, int > BinaryOperators
Definition compiler.hpp:169
List< MonoAllocator, String > UnaryOperators
Definition compiler.hpp:121
int GetBinaryOperatorPrecedence(const String &symbol)
Definition compiler.hpp:190
Compilation CfgCompilation
Compilation flags.
Definition compiler.hpp:273
ASTLiteral::NFHint tokLiteralHint
The actual token type.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > unaryOperators
virtual ALIB_API detail::AST * Parse(const String &exprString, NumberFormat *nf) override
integer tokPosition
The position of the token in expression.
Tokens token
The actual token type.
integer tokInteger
Integer value of token (if applicable).
void NextToken()
This is the "scanner" or "lexer" method.
NumberFormat * numberFormat
Used for scanning literals. Provided to this class with each parse request.
Substring scanner
The rest of expression.
String expression
The given expression to parse.
Compiler & compiler
The compiler that this parser works for.
ParserImpl(Compiler &compiler, MonoAllocator &allocator)
String tokString
String value of token (if applicable).
double tokFloat
Float value of token (if applicable).
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > binaryOperators
const String & GetResource(const NString &name)
Exception & Add(const lang::CallerInfo &ci, TEnum type, TArgs &&... args)
constexpr bool IsNull() const
Definition string.hpp:364
integer IndexOf(TChar needle, integer startIdx=0) const
Definition string.hpp:896
constexpr bool IsEmpty() const
Definition string.hpp:383
void Allocate(TAllocator &allocator, const TString< TChar > &copy)
Definition string.hpp:2012
constexpr bool IsNotEmpty() const
Definition string.hpp:389
constexpr integer Length() const
Definition string.hpp:326
TChar CharAtStart() const
Definition string.hpp:466
constexpr bool IsNotNull() const
Definition string.hpp:371
std::size_t HashcodeIgnoreCase() const
TString< TChar > Substring(integer regionStart, integer regionLength=MAX_LEN) const
Definition string.hpp:406
bool Equals(const TString< TChar > &rhs) const
Definition string.hpp:580
bool StartsWith(const TString &needle) const
Definition string.hpp:820
constexpr const TChar * Buffer() const
Definition string.hpp:319
ALIB_API bool ConsumeFloat(double &result, TNumberFormat< TChar > *numberFormat=nullptr)
integer ConsumeChars(integer regionLength, TSubstring *target=nullptr)
TSubstring & TrimStart(const TCString< TChar > &whiteSpaces=TT_CStringConstants< TChar >::DefaultWhitespaces())
Definition substring.hpp:89
bool ConsumeInt(TIntegral &result, TNumberFormat< TChar > *numberFormat=nullptr)
#define ALIB_CALLER_NULLED
Definition alib.hpp:1173
#define ALIB_CALLER
Definition alib.hpp:1164
#define A_CHAR(STR)
#define ALIB_WARNINGS_RESTORE
Definition alib.hpp:849
#define ALIB_ERROR(...)
Definition alib.hpp:1267
#define ALIB_WARNINGS_ALLOW_UNSAFE_BUFFER_USAGE
Definition alib.hpp:760
@ UnknownBinaryOperatorSymbol
Unknown binary operator symbol found when parsing expression string.
@ UnknownUnaryOperatorSymbol
Unknown unary operator symbol found when parsing expression string.
@ EmptyExpressionString
Thrown when an empty string is tried to be compiled.
@ SyntaxErrorExpectation
Syntax error with concrete information about what the parser expected at given position.
@ SyntaxError
General error thrown by the parser.
@ Off
Switch it off, switched off, etc.
Definition alib.cpp:69
lang::Exception Exception
Type alias in namespace alib.
expressions::ExpressionsCamp EXPRESSIONS
The singleton instance of ALib Camp class ExpressionsCamp.
std::vector< T, SCAMono< T > > StdVectorMono
Type alias in namespace alib.
Definition stdvector.hpp:21
strings::TSubstring< character > Substring
Type alias in namespace alib.
characters::character character
Type alias in namespace alib.
strings::TString< character > String
Type alias in namespace alib.
LocalString< 1024 > String1K
Type alias name for TLocalString<character,1024>.
constexpr String NULL_STRING
A nulled string of the default character type.
Definition string.hpp:2549
lang::integer integer
Type alias in namespace alib.
Definition integers.hpp:273
Abstract syntax tree node representing binary operators.
Definition ast.hpp:234
String Operator
The operator symbol.
Definition ast.hpp:235
AST * Lhs
The left-hand-side expression node.
Definition ast.hpp:236
Abstract syntax tree node representing ternary operator Q ? T : F.
Definition ast.hpp:267
Abstract syntax tree node representing a function call.
Definition ast.hpp:167
List< MonoAllocator, AST * > Arguments
The argument nodes.
Definition ast.hpp:169
@ Scientific
Float was given in scientific format.
@ Hexadecimal
Integral value was given in hexadecimal format.
@ Binary
Integral value was given in binary format.
@ Octal
Integral value was given in octal format.
Types NodeType
Type of derived this AST node.
Definition ast.hpp:38
TCString< TChar > BinLiteralPrefix
NumberFormatFlags Flags
The flag field.
TCString< TChar > OctLiteralPrefix
TCString< TChar > HexLiteralPrefix
TCString< TChar > ExponentSeparator