ALib C++ Library
Library Version: 2402 R1
Documentation generated by doxygen
Loading...
Searching...
No Matches
parser_impl.cpp
1// #################################################################################################
2// ALib C++ Library
3//
4// Copyright 2013-2024 A-Worx GmbH, Germany
5// Published under 'Boost Software License' (a free software license, see LICENSE.txt)
6// #################################################################################################
8
9#if !defined(ALIB_DOX)
10# if !defined (HPP_ALIB_EXPRESSIONS_DETAIL_PARSER_IMPL)
12# endif
13
14# if !defined (HPP_ALIB_EXPRESSIONS_COMPILER)
16# endif
17
18# if !defined (HPP_ALIB_MONOMEM_MASTRING)
20# endif
21# if !defined (HPP_ALIB_STRINGS_FORMAT)
23# endif
24# if !defined (HPP_ALIB_LANG_CAMP_INLINES)
26# endif
27
28#endif // !defined(ALIB_DOX)
29
30
31namespace alib { namespace expressions { namespace detail {
32
33// #################################################################################################
34// Parser
35// #################################################################################################
36
38: compiler ( pCompiler )
39, unaryOperators ( allocator )
40, binaryOperators( allocator )
41{
42 // characters to be known
43 syntaxTokens [static_cast<unsigned char>('(')]= true;
44 syntaxTokens [static_cast<unsigned char>(')')]= true;
45 syntaxTokens [static_cast<unsigned char>(',')]= true;
46
47 operatorChars[static_cast<unsigned char>('?')]= true;
48 operatorChars[static_cast<unsigned char>(':')]= true;
49
50 // define unary ops
51 for( auto& op : compiler.UnaryOperators )
52 {
53 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op), "EXPR",
54 "Doubly defined unary operator symbol {!Q'}.", op )
55
56 unaryOperators.EmplaceUnique(op);
57 for( auto it : op )
58 operatorChars[static_cast<unsigned char>(it)]= true;
59 }
60
62 {
63 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op.first), "EXPR",
64 "Doubly defined unary operator symbol {!Q'}.", op.first )
65
66 unaryOperators.EmplaceUnique(op.first);
67 if( !isalpha( op.first.CharAtStart() ) )
68 for( auto it : op.first )
69 operatorChars[static_cast<unsigned char>(it)]= true;
70 }
71
72
73 for( auto op : compiler.BinaryOperators )
74 {
75 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
76 "Doubly defined binary operator symbol {!Q'}.", op.first )
77 if( op.first == A_CHAR("[]") )
78 {
79 syntaxTokens[static_cast<unsigned char>('[')]= true;
80 syntaxTokens[static_cast<unsigned char>(']')]= true;
81 }
82 else
83 {
84 binaryOperators.EmplaceUnique(op.first);
85 for( auto it : op.first )
86 operatorChars[static_cast<unsigned char>(it)]= true;
87 }
88 }
89
91 {
92 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
93 "Doubly defined binary operator symbol {!Q'}.", op.first )
94
95 ALIB_DBG( auto originalOp= )
96 compiler.BinaryOperators.Find( op.second );
97 ALIB_ASSERT_ERROR( originalOp != compiler.BinaryOperators.end(), "EXPR",
98 "Alias {!Q'} defined for unknown operator {!Q'}.",
99 op.first, op.second )
100
101 binaryOperators.EmplaceUnique(op.first);
102 if( !isalpha( op.first.CharAtStart() ) )
103 for( auto it : op.first )
104 operatorChars[static_cast<unsigned char>(it)]= true;
105 }
106}
107
108// #################################################################################################
109// Lexer
110// #################################################################################################
112{
115
116 if( scanner.IsEmpty() )
117 {
118 token= Tokens::EOT;
119 return;
120 }
121
122 character first= scanner.CharAtStart<false>();
123
124 //------------------------------ Syntax Tokens ------------------------------
125 if( syntaxTokens[static_cast<unsigned char>(first)] )
126 {
127 token= Tokens(first);
129 return;
130 }
131
132 //------------------------------ Symbolic operators ------------------------------
133 // read up to 3 operator characters
134 if( operatorChars[static_cast<unsigned char>(first)] )
135 {
136 integer operatorLength= 1;
138 if( operatorChars[static_cast<unsigned char>(scanner.CharAtStart() ) ] )
139 {
141 ++operatorLength;
142
143 if( operatorChars[static_cast<unsigned char>(scanner.CharAtStart() ) ] )
144 {
146 ++operatorLength;
147 }
148 }
149
150 token= Tokens::SymbolicOp;
152 tokString= String( expression.Buffer() + tokPosition, operatorLength );
154
155 // special treatment for Elvis with spaces "? :"
156 if( tokString == A_CHAR("?") && compiler.BinaryOperators.Contains( A_CHAR("?:") ) )
157 {
158 // patch existing token and return
159 Substring backup= scanner;
160 if( scanner.TrimStart().CharAtStart() == ':' )
161 {
162 tokString= A_CHAR("?:");
164 }
165 else
166 scanner= backup;
167 }
168 return;
169 }
170
171 //------------------------------ alphabetic operators ------------------------------
172 if( isalpha( first ) )
173 {
174 integer len= 1;
175 while( len < scanner.Length() && ( isalpha( scanner[len] ) || scanner[len] == '_' ) )
176 ++len;
177 tokString= scanner.Substring<false>( 0, len );
178 auto hashCode= tokString.HashcodeIgnoreCase();
179
180 // unary
181 {
182 decltype(unaryOperators)::Iterator it;
183 if( (it= unaryOperators .Find( tokString, hashCode )) != unaryOperators.end()
185 || tokString.Equals<false>( it.Value() ) ) )
186 {
188 token= Tokens::AlphaUnOp;
189 return;
190 }
191 }
192
193 // binary
194 {
195 decltype(binaryOperators)::Iterator it;
196 if( (it= binaryOperators .Find( tokString, hashCode )) != binaryOperators.end()
198 || tokString.Equals<false>( it.Value() ) ) )
199 {
201 token= Tokens::AlphaBinOp;
202 return;
203 }
204 }
205
206 }
207
208 //------------------------------ Identifiers ------------------------------
209 if( isalpha( first ) || first == '_' )
210 {
211 integer endOfIdent= 0;
212 character next= 0;
213 while( ++endOfIdent < scanner.Length()
214 && ( isalnum( next= scanner[endOfIdent] )
215 || next == '_' ) );
216
217 token= Tokens::Identifier;
219 tokString= String( expression.Buffer() + tokPosition, endOfIdent );
221 scanner.ConsumeChars<false>( endOfIdent );
222 return;
223 }
224
225 //------------------------------ numbers ------------------------------
226 if( isdigit( first ) )
227 {
228 integer endOfDecPart= 0;
229 character next= 0;
230 while( ++endOfDecPart < scanner.Length()
231 && ( isdigit( next= scanner[endOfDecPart] )
232 || ( HasBits(numberFormat->Flags, NumberFormatFlags::ReadGroupChars) && next== numberFormat->ThousandsGroupChar ) )
233 );
234
235
236 // float number
237 if( next == numberFormat->DecimalPointChar
238 || next == 'e'
239 || next == 'E'
241
242 {
243 auto oldStart= scanner.Buffer();
244 double value;
246 token = Tokens::LitFloat;
247 tokFloat= value;
248
249 String numberParsed( oldStart, scanner.Buffer() - oldStart );
250 tokLiteralHint= numberParsed.IndexOf('e') > 0
251 || numberParsed.IndexOf('E') > 0
252 || numberParsed.IndexOf( numberFormat->ExponentSeparator ) > 0
255 }
256
257 // integer number
258 else
259 {
267
268 integer value;
270 token= Tokens::LitInteger;
271 tokInteger= value;
272 }
273
274 return;
275 }
276
277 //------------------------------ Strings ------------------------------
278 if( first == '"' )
279 {
280 bool lastWasSlash= false;
281 scanner.ConsumeChar<false>();
282 character next;
283 while( (next= scanner.ConsumeChar()) != '\0' )
284 {
285 if( next == '\\' ) { lastWasSlash= true; continue; }
286 if( next == '\"' && !lastWasSlash ) break;
287 lastWasSlash= false;
288 }
289
290 if( next != '"' )
291 {
293 EXPRESSIONS.GetResource("EE4") );
296 throw e;
297 }
298
300 String quoted( expression.Buffer() + tokPosition + 1,
303
304
305 MAString internalizer( *compileTimeAllocator, quoted.Length() + 1 );
306 internalizer << quoted << Format::Escape( lang::Switch::Off );
307 token = Tokens::LitString;
308 tokString= internalizer;
309 return;
310 }
311
312 // -------- unrecognized token ---------
315 throw e;
316}
317
318
319
320// #################################################################################################
321// Parser
322// #################################################################################################
323#define Start parseConditional
324
326{
327 if( exprString.IsEmpty() )
329
330 expression = exprString;
331 numberFormat = nf;
333 ASTs= ba->Emplace<std::vector<AST*, StdContMA<AST*>>>( StdContMA<AST*>(*ba) );
334 ASTs->reserve(20);
335
336 // load first token
338 NextToken();
339
340//ALIB_DBG( lexer.DbgListTokens(); )
341
342 AST* ast= Start();
343
344
345 // if tokens remain, an "operator" would be expected
346 if( token != Tokens::EOT )
347 {
350 throw e;
351 }
352
353 return ast;
354}
355
356
358{
359 // parse lhs as simple
360 push( parseBinary() ); // Q
361
362 integer qmPosition= tokPosition;
363
364
365 if( token == Tokens::SymbolicOp && tokString == A_CHAR("?") )
366 {
367 NextToken();
368 push( Start() ); // T
369
370 // expect colon
371 if( token != Tokens::SymbolicOp || tokString != A_CHAR(":") )
372 {
375 throw e;
376 }
377 integer colonPosition= tokPosition;
378
379 NextToken();
380
381 AST* F= Start();
382 AST* T= pop();
383 AST* Q= pop();
384 return compileTimeAllocator->Emplace<ASTConditional>( Q, T, F, qmPosition, colonPosition );
385 }
386
387 // was no conditional
388 return pop();
389}
390
392{
393 // parse lhs as simple
394 push( parseSimple() );
395
396 // parse
397 integer position= tokPosition;
398 String binOp;
399 for( ;; )
400 {
401 binOp= getBinaryOp();
402 if( binOp.IsNull() )
403 return pop();
404
405 // rhs is braced? -> lhs becomes <lhs op rhs> and we start over
406 if( token == Tokens::BraceOpen )
407 {
408 replace( compileTimeAllocator->Emplace<ASTBinaryOp>( binOp, top(), parseSimple(), position ) );
409 position= tokPosition;
410 continue;
411 }
412 break;
413 }
414
415 // check if tokens remain
416 if( token == Tokens::EOT )
417 {
420 throw e;
421 }
422
423 AST* lhs= top();
424 AST* rhs= push( parseBinary() );
425
426 int binOpPrecedence= compiler.GetBinaryOperatorPrecedence( binOp );
427 AST* replace = rhs;
428 ASTBinaryOp* parent = nullptr;
429 while( replace->NodeType == AST::Types::BinaryOp
430 && compiler.GetBinaryOperatorPrecedence(dynamic_cast<ASTBinaryOp*>(replace)->Operator) <= binOpPrecedence )
431 {
432 parent = dynamic_cast<ASTBinaryOp*>(replace);
433 replace= parent->Lhs;
434 }
435
436 pop();
437 pop();
438 if( parent == nullptr )
439 return compileTimeAllocator->Emplace<ASTBinaryOp>( binOp, lhs, rhs, position );
440
441 // insert binary at lhs of deepest equal-level binary found.
442 // Its current lhs becomes its new lhs-child's rhs.
443 parent->Lhs= compileTimeAllocator->Emplace<ASTBinaryOp>( binOp, lhs, parent->Lhs, position );
444 return rhs;
445}
446
448{
449 // '(' expr ')' (brackets)
450 if( token == Tokens::BraceOpen )
451 {
452 NextToken();
453 push( Start() );
454
455 if( token != Tokens::BraceClose )
456 {
459 throw e;
460 }
461 NextToken();
462 replace( parseSubscript( top() ) );
463 return pop();
464 }
465
466 // unary operator
467 integer position= tokPosition;
468 {
469 String unOp= getUnaryOp();
470 if( unOp.IsNotNull() )
471 {
472 push( compileTimeAllocator->Emplace<ASTUnaryOp>( unOp, parseSimple(), position ) );
473 replace( parseSubscript( top() ) );
474 return pop();
475 }
476 }
477
478 // terminals
479 if( token == Tokens::LitInteger ) { push(compileTimeAllocator->Emplace<ASTLiteral>( tokInteger, position, tokLiteralHint )); NextToken(); replace( parseSubscript(top()) ); return pop(); }
480 if( token == Tokens::LitFloat ) { push(compileTimeAllocator->Emplace<ASTLiteral>( tokFloat , position, tokLiteralHint )); NextToken(); replace( parseSubscript(top()) ); return pop(); }
481 if( token == Tokens::LitString ) { push(compileTimeAllocator->Emplace<ASTLiteral>( compileTimeAllocator->EmplaceString(tokString), position )); NextToken(); replace( parseSubscript(top()) ); return pop(); }
482 if( token == Tokens::Identifier || token == Tokens::AlphaBinOp ) // allow bin op's names here! This is tricky but right!
483 {
484 String name= tokString;
485 NextToken();
486
487 // function
488 if( token == Tokens::BraceOpen )
489 {
490 ASTFunction* astFunction= compileTimeAllocator->Emplace<ASTFunction>( name, position, *compileTimeAllocator );
491 push( astFunction );
492 for(;;)
493 {
494 NextToken();
495 if( token == Tokens::BraceClose )
496 {
497 NextToken();
498 return pop();
499 }
500 astFunction->Arguments.EmplaceBack( Start() );
501
502 if( token == Tokens::Comma )
503 continue;
504
505 if( token != Tokens::BraceClose )
506 {
509 throw e;
510 }
511
512 NextToken();
513 replace( parseSubscript( astFunction ) );
514 return pop();
515 }
516 }
517
518 // identifier
520 return pop();
521 }
522
523 // ---------------------------------------- ERRORS -----------------------------------------
524 if( token == Tokens::EOT )
525 {
528 throw e;
529 }
530
531 if( token == Tokens::BraceClose )
532 {
535 throw e;
536 }
537
538 if( token == Tokens::SubscriptOpen || token == Tokens::SubscriptClose )
539 {
542 throw e;
543 }
544
545 if( token == Tokens::Comma )
546 {
549 throw e;
550 }
551
552 ALIB_ERROR( "EXPR", "Internal parser Error. This should never happen")
553 return nullptr;
554}
555
557{
559 || token != Tokens::SubscriptOpen )
560 return function;
561
562 integer position= tokPosition;
563
564 NextToken();
565
566 push( Start() );
567
568 if( token != Tokens::SubscriptClose )
569 {
572 throw e;
573 }
574
575 // success
576 NextToken();
577 return compileTimeAllocator->Emplace<ASTBinaryOp>( A_CHAR("[]"), function, pop(), position );
578}
579
580
581// #################################################################################################
582// Helpers
583// #################################################################################################
584
585
587{
588 if( token == Tokens::SymbolicOp )
589 {
590 // symbolic unary ops may be nested. Hence, we find one by one from the actual token and consume the
591 // token only if all is consumed.
592 for( integer partialRead= 1 ; partialRead <= tokString.Length() ; ++partialRead )
593 {
594 Substring key= Substring( tokString.Buffer(), partialRead );
595 if( unaryOperators.Contains( key ) )
596 {
597 if( partialRead == tokString.Length() )
598 NextToken();
599 else
600 {
602 tokString= String( tokString.Buffer() + partialRead,
603 tokString.Length() - partialRead );
604 tokPosition+= partialRead;
606 }
607 return key;
608 }
609 }
612 throw e;
613 }
614 else if ( token == Tokens::AlphaUnOp )
615 {
616 String alphabeticOperator= tokString;
617 NextToken();
618 return alphabeticOperator;
619 }
620
621 return NullString();
622}
623
625{
626 if ( token == Tokens::SymbolicOp )
627 {
628 // ignore ternary
629 if ( tokString == A_CHAR( "?" ) || tokString == A_CHAR( ":" ) )
630 return NullString();
631
632 // binary ops may be longer and concatenated with unaries. So we consume as much as possible
633 // but are happy with less than available
634 for ( integer partialRead = tokString.Length(); partialRead > 0; --partialRead )
635 {
636 Substring key = Substring( tokString.Buffer(), partialRead );
637 if ( binaryOperators.Contains( key ) )
638 {
639 if ( partialRead == tokString.Length() )
640 NextToken();
641 else
642 {
644 tokString = String( tokString.Buffer() + partialRead,
645 tokString.Length() - partialRead );
646 tokPosition += partialRead;
648 }
649 return key;
650 }
651 }
652
655 throw e;
656 }
657 else if ( token == Tokens::AlphaBinOp )
658 {
659 String alphabeticOperator= tokString;
660 NextToken();
661 return alphabeticOperator;
662 }
663
664 return NullString();
665}
666
667
668#undef Start
669
670}}} // namespace [alib::expressions::detail]
HashMap< String, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > AlphabeticUnaryOperatorAliases
Definition compiler.hpp:154
List< String > UnaryOperators
Definition compiler.hpp:142
HashMap< String, int > BinaryOperators
Definition compiler.hpp:197
int GetBinaryOperatorPrecedence(const String &symbol)
Definition compiler.hpp:222
HashMap< String, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > AlphabeticBinaryOperatorAliases
Definition compiler.hpp:166
std::vector< AST *, StdContMA< AST * > > * ASTs
HashSet< String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > binaryOperators
ParserImpl(Compiler &compiler, MonoAllocator *allocator)
HashSet< String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > unaryOperators
virtual ALIB_API detail::AST * Parse(const String &exprString, NumberFormat *nf, MonoAllocator *allocator) override
const String & GetResource(const NString &name)
Exception & Add(const NCString &file, int line, const NCString &func, TEnum type, TArgs &&... args)
ALIB_FORCE_INLINE T * Emplace(TArgs &&... args)
strings::TString< TChar > EmplaceString(const strings::TString< TChar > &src)
constexpr bool IsNull() const
Definition string.hpp:395
integer IndexOf(TChar needle, integer startIdx=0) const
Definition string.hpp:889
constexpr bool IsEmpty() const
Definition string.hpp:414
constexpr bool IsNotEmpty() const
Definition string.hpp:420
constexpr integer Length() const
Definition string.hpp:357
TChar CharAtStart() const
Definition string.hpp:459
constexpr bool IsNotNull() const
Definition string.hpp:402
std::size_t HashcodeIgnoreCase() const
TString< TChar > Substring(integer regionStart, integer regionLength=MAX_LEN) const
Definition string.hpp:314
bool Equals(const TString< TChar > &rhs) const
Definition string.hpp:573
bool StartsWith(const TString &needle) const
Definition string.hpp:813
constexpr const TChar * Buffer() const
Definition string.hpp:350
ALIB_API bool ConsumeFloat(double &result, TNumberFormat< TChar > *numberFormat=nullptr)
integer ConsumeChars(integer regionLength, TSubstring *target=nullptr)
bool ConsumeInt(TIntegral &result, TNumberFormat< TChar > *numberFormat=nullptr)
TSubstring & TrimStart(const TCString< TChar > &whiteSpaces=TT_StringConstants< TChar >::DefaultWhitespaces())
Definition substring.hpp:89
#define ALIB_CALLER_NULLED
Definition alib.hpp:846
#define ALIB_CALLER
Definition alib.hpp:835
#define A_CHAR(STR)
#define ALIB_WARNINGS_RESTORE
Definition alib.hpp:715
#define ALIB_ERROR(...)
Definition alib.hpp:980
#define ALIB_ASSERT_ERROR(cond,...)
Definition alib.hpp:984
#define ALIB_WARNINGS_ALLOW_UNSAFE_BUFFER_USAGE
Definition alib.hpp:644
#define ALIB_DBG(...)
Definition alib.hpp:457
@ Off
Switch it off, switched off, etc.
Definition alib.cpp:57
lang::Exception Exception
Type alias in namespace alib.
expressions::Expressions EXPRESSIONS
strings::TSubstring< character > Substring
Type alias in namespace alib.
constexpr String NullString()
Definition string.hpp:2498
characters::character character
Type alias in namespace alib.
strings::TString< character > String
Type alias in namespace alib.
lang::integer integer
Type alias in namespace alib.
Definition integers.hpp:286
String Operator
The operator symbol.
Definition ast.hpp:291
AST * Lhs
The left-hand-side expression node.
Definition ast.hpp:292
List< AST * > Arguments
The argument nodes.
Definition ast.hpp:205
@ Scientific
Float was given in scientific format.
@ Hexadecimal
Integral value was given in hexadecimal format.
@ Binary
Integral value was given in binary format.
@ Octal
Integral value was given in octal format.
TCString< TChar > BinLiteralPrefix
TCString< TChar > OctLiteralPrefix
TCString< TChar > HexLiteralPrefix
TCString< TChar > ExponentSeparator