ALib C++ Library
Library Version: 2510 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
parser_impl.cpp
1// #################################################################################################
2// ALib C++ Library
3//
4// Copyright 2013-2025 A-Worx GmbH, Germany
5// Published under 'Boost Software License' (a free software license, see LICENSE.txt)
6// #################################################################################################
7#include "alib_precompile.hpp"
8#if !defined(ALIB_C20_MODULES) || ((ALIB_C20_MODULES != 0) && (ALIB_C20_MODULES != 1))
9# error "Symbol ALIB_C20_MODULES has to be given to the compiler as either 0 or 1"
10#endif
11#if ALIB_C20_MODULES
12 module;
13#endif
14// ====================================== Global Fragment ======================================
17// =========================================== Module ==========================================
18#if ALIB_C20_MODULES
20 import ALib.Expressions;
21#else
23#endif
24// ====================================== Implementation =======================================
25namespace alib { namespace expressions { namespace detail {
26
27// #################################################################################################
28// Parser
29// #################################################################################################
30
32: compileTimeAllocator(allocator)
33, compiler (pCompiler)
34, unaryOperators (allocator)
35, binaryOperators (allocator)
36{
37 // characters to be known
38 syntaxTokens [u8'(']= true;
39 syntaxTokens [u8')']= true;
40 syntaxTokens [u8',']= true;
41 operatorChars[u8'?']= true;
42 operatorChars[u8':']= true;
43
44 // define unary ops
45 for( auto& op : compiler.UnaryOperators )
46 {
47 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op), "EXPR",
48 "Doubly defined unary operator symbol '{}'.", op )
49
50 unaryOperators.EmplaceUnique(op);
51 for( auto it : op )
52 operatorChars[it]= true;
53 }
54
55 for( auto& op : compiler.AlphabeticUnaryOperatorAliases )
56 {
57 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op.first), "EXPR",
58 "Doubly defined unary operator symbol '{}'.", op.first )
59
60 unaryOperators.EmplaceUnique(op.first);
61 if( !isalpha( op.first.CharAtStart() ) )
62 for( auto it : op.first )
63 operatorChars[it]= true;
64 }
65
66
67 for( auto& op : compiler.BinaryOperators )
68 {
69 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
70 "Doubly defined binary operator symbol '{}'.", op.first )
71 if( op.first == A_CHAR("[]") )
72 {
73 syntaxTokens[u8'[']= true;
74 syntaxTokens[u8']']= true;
75 }
76 else
77 {
78 binaryOperators.EmplaceUnique(op.first);
79 for( auto it : op.first )
80 operatorChars[it]= true;
81 }
82 }
83
84 for( auto& op : compiler.AlphabeticBinaryOperatorAliases )
85 {
86 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
87 "Doubly defined binary operator symbol '{}'.", op.first )
88
89 ALIB_DBG( auto originalOp= )
90 compiler.BinaryOperators.Find( op.second );
91 ALIB_ASSERT_ERROR( originalOp != compiler.BinaryOperators.end(), "EXPR",
92 "Alias '{}' defined for unknown operator '{}'.",
93 op.first, op.second )
94
95 binaryOperators.EmplaceUnique(op.first);
96 if( !isalpha( op.first.CharAtStart() ) )
97 for( auto it : op.first )
98 operatorChars[it]= true;
99 }
100}
101
102// #################################################################################################
103// Lexer
104// #################################################################################################
105void ParserImpl::NextToken()
106{
107 scanner.TrimStart();
108 tokPosition= expression.Length() - scanner.Length();
109
110 if( scanner.IsEmpty() )
111 {
112 token= Tokens::EOT;
113 return;
114 }
115
116 character first= scanner.CharAtStart<NC>();
117
118 //------------------------------ Syntax Tokens ------------------------------
119 if( syntaxTokens[first] )
120 {
121 token= Tokens(first);
122 scanner.ConsumeChar();
123 return;
124 }
125
126 //------------------------------ Symbolic operators ------------------------------
127 // read up to 3 operator characters
128 if( operatorChars[first] )
129 {
130 integer operatorLength= 1;
131 scanner.ConsumeChar();
132 if( operatorChars[scanner.CharAtStart() ] )
133 {
134 scanner.ConsumeChar();
135 ++operatorLength;
136
137 if( operatorChars[scanner.CharAtStart() ] )
138 {
139 scanner.ConsumeChar();
140 ++operatorLength;
141 }
142 }
143
144 token= Tokens::SymbolicOp;
145 tokString= String( expression.Buffer() + tokPosition, operatorLength );
146
147 // special treatment for Elvis with spaces "? :"
148 if( tokString == A_CHAR("?") && compiler.BinaryOperators.Contains( A_CHAR("?:") ) )
149 {
150 // patch existing token and return
151 Substring backup= scanner;
152 if( scanner.TrimStart().CharAtStart() == ':' )
153 {
154 tokString= A_CHAR("?:");
155 scanner.ConsumeChar();
156 }
157 else
158 scanner= backup;
159 }
160 return;
161 }
162
163 //------------------------------ alphabetic operators ------------------------------
164 if( isalpha( first ) )
165 {
166 integer len= 1;
167 while( len < scanner.Length() && ( isalpha( scanner[len] ) || scanner[len] == '_' ) )
168 ++len;
169 tokString= scanner.Substring<NC>( 0, len );
170 auto hashCode= tokString.HashcodeIgnoreCase();
171
172 // unary
173 {
174 decltype(unaryOperators)::Iterator it;
175 if( (it= unaryOperators .Find( tokString, hashCode )) != unaryOperators.end()
176 && ( HasBits(compiler.CfgCompilation, Compilation::AlphabeticOperatorsIgnoreCase)
177 || tokString.Equals<NC>( it.Value() ) ) )
178 {
179 scanner.ConsumeChars<NC>( tokString.Length() );
180 token= Tokens::AlphaUnOp;
181 return;
182 }
183 }
184
185 // binary
186 {
187 decltype(binaryOperators)::Iterator it;
188 if( (it= binaryOperators .Find( tokString, hashCode )) != binaryOperators.end()
189 && ( HasBits(compiler.CfgCompilation, Compilation::AlphabeticOperatorsIgnoreCase)
190 || tokString.Equals<NC>( it.Value() ) ) )
191 {
192 scanner.ConsumeChars<NC>( tokString.Length() );
193 token= Tokens::AlphaBinOp;
194 return;
195 }
196 }
197
198 }
199
200 //------------------------------ Identifiers ------------------------------
201 if( isalpha( first ) || first == '_' )
202 {
203 integer endOfIdent= 0;
204 character next= 0;
205 while( ++endOfIdent < scanner.Length()
206 && ( isalnum( next= scanner[endOfIdent] )
207 || next == '_' ) );
208
209 token= Tokens::Identifier;
210 tokString= String( expression.Buffer() + tokPosition, endOfIdent );
211 scanner.ConsumeChars<NC>( endOfIdent );
212 return;
213 }
214
215 //------------------------------ numbers ------------------------------
216 if( isdigit( first ) )
217 {
218 integer endOfDecPart= 0;
219 character next= 0;
220 while( ++endOfDecPart < scanner.Length()
221 && ( isdigit( next= scanner[endOfDecPart] )
222 || ( HasBits(numberFormat->Flags, NumberFormatFlags::ReadGroupChars) && next== numberFormat->ThousandsGroupChar ) )
223 );
224
225
226 // float number
227 if( next == numberFormat->DecimalPointChar
228 || next == 'e'
229 || next == 'E'
230 || scanner.Substring( endOfDecPart ).StartsWith( numberFormat->ExponentSeparator ) )
231
232 {
233 auto oldStart= scanner.Buffer();
234 double value;
235 scanner.ConsumeFloat( value, numberFormat );
236 token = Tokens::LitFloat;
237 tokFloat= value;
238
239 String numberParsed( oldStart, scanner.Buffer() - oldStart );
240 tokLiteralHint= numberParsed.IndexOf('e') > 0
241 || numberParsed.IndexOf('E') > 0
242 || numberParsed.IndexOf( numberFormat->ExponentSeparator ) > 0
243 ? ASTLiteral::NFHint::Scientific
244 : ASTLiteral::NFHint::NONE;
245 }
246
247 // integer number
248 else
249 {
250 tokLiteralHint= ASTLiteral::NFHint::NONE;
251 if( numberFormat->HexLiteralPrefix.IsNotEmpty()
252 && scanner.StartsWith( numberFormat->HexLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Hexadecimal;
253 else if( numberFormat->OctLiteralPrefix.IsNotEmpty()
254 && scanner.StartsWith( numberFormat->OctLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Octal;
255 else if( numberFormat->BinLiteralPrefix.IsNotEmpty()
256 && scanner.StartsWith( numberFormat->BinLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Binary;
257
258 integer value;
259 scanner.ConsumeInt( value, numberFormat );
260 token= Tokens::LitInteger;
261 tokInteger= value;
262 }
263
264 return;
265 }
266
267 //------------------------------ Strings ------------------------------
268 if( first == '"' )
269 {
270 bool lastWasSlash= false;
271 scanner.ConsumeChar<NC>();
272 character next;
273 while( (next= scanner.ConsumeChar()) != '\0' )
274 {
275 if( next == '\\' ) { lastWasSlash= true; continue; }
276 if( next == '\"' && !lastWasSlash ) break;
277 lastWasSlash= false;
278 }
279
280 if( next != '"' )
281 {
282 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation,
283 EXPRESSIONS.GetResource("EE4") );
284 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo,
285 expression, expression.Length() - scanner.Length() );
286 throw e;
287 }
288
289 String quoted( expression.Buffer() + tokPosition + 1,
290 expression.Length() - scanner.Length() - tokPosition -2 );
291 token = Tokens::LitString;
292 tokString.Allocate(compileTimeAllocator, String1K(quoted) << Escape( lang::Switch::Off ) );
293 return;
294 }
295
296 // -------- unrecognized token ---------
297 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxError );
298 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, expression.Length() - scanner.Length() );
299 throw e;
300}
301
302
303
304// #################################################################################################
305// Parser
306// #################################################################################################
307#define Start parseConditional
308
309detail::AST* ParserImpl::Parse( const String& exprString, NumberFormat* nf )
310{
311 if( exprString.IsEmpty() )
312 throw Exception( ALIB_CALLER, Exceptions::EmptyExpressionString );
313
314 expression = exprString;
315 numberFormat= nf;
316 ASTs = compileTimeAllocator().New<StdVectorMono<AST*>>( compileTimeAllocator );
317 ASTs->reserve(20);
318
319 // load first token
320 scanner= expression;
321 NextToken();
322
323//ALIB_DBG( lexer.DbgListTokens(); )
324
325 AST* ast= Start();
326
327
328 // if tokens remain, an "operator" would be expected
329 if( token != Tokens::EOT )
330 {
331 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE5") );
332 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
333 throw e;
334 }
335
336 return ast;
337}
338
339
340AST* ParserImpl::parseConditional()
341{
342 // parse lhs as simple
343 push( parseBinary() ); // Q
344
345 integer qmPosition= tokPosition;
346
347
348 if( token == Tokens::SymbolicOp && tokString == A_CHAR("?") )
349 {
350 NextToken();
351 push( Start() ); // T
352
353 // expect colon
354 if( token != Tokens::SymbolicOp || tokString != A_CHAR(":") )
355 {
356 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE6") );
357 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
358 throw e;
359 }
360 integer colonPosition= tokPosition;
361
362 NextToken();
363
364 AST* F= Start();
365 AST* T= pop();
366 AST* Q= pop();
367 return compileTimeAllocator().New<ASTConditional>(Q, T, F, qmPosition, colonPosition );
368 }
369
370 // was no conditional
371 return pop();
372}
373
374AST* ParserImpl::parseBinary()
375{
376 // parse lhs as simple
377 push( parseSimple() );
378
379 // parse
380 integer position= tokPosition;
381 String binOp;
382 for( ;; )
383 {
384 binOp= getBinaryOp();
385 if( binOp.IsNull() )
386 return pop();
387
388 // rhs is braced? -> lhs becomes <lhs op rhs> and we start over
389 if( token == Tokens::BraceOpen )
390 {
391 replace( compileTimeAllocator().New<ASTBinaryOp>(binOp, top(), parseSimple(), position ) );
392 position= tokPosition;
393 continue;
394 }
395 break;
396 }
397
398 // check if tokens remain
399 if( token == Tokens::EOT )
400 {
401 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE7") );
402 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
403 throw e;
404 }
405
406 AST* lhs= top();
407 AST* rhs= push( parseBinary() );
408
409 int binOpPrecedence= compiler.GetBinaryOperatorPrecedence( binOp );
410 AST* replace = rhs;
411 ASTBinaryOp* parent = nullptr;
412 while( replace->NodeType == AST::Types::BinaryOp
413 && compiler.GetBinaryOperatorPrecedence(dynamic_cast<ASTBinaryOp*>(replace)->Operator) <= binOpPrecedence )
414 {
415 parent = dynamic_cast<ASTBinaryOp*>(replace);
416 replace= parent->Lhs;
417 }
418
419 pop();
420 pop();
421 if( parent == nullptr )
422 return compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, rhs, position );
423
424 // insert binary at lhs of deepest equal-level binary found.
425 // Its current lhs becomes its new lhs-child's rhs.
426 parent->Lhs= compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, parent->Lhs, position );
427 return rhs;
428}
429
431{
432 // '(' expr ')' (brackets)
433 if( token == Tokens::BraceOpen )
434 {
435 NextToken();
436 push( Start() );
437
438 if( token != Tokens::BraceClose )
439 {
440 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE1"));
441 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
442 throw e;
443 }
444 NextToken();
445 replace( parseSubscript( top() ) );
446 return pop();
447 }
448
449 // unary operator
450 integer position= tokPosition;
451 {
452 String unOp= getUnaryOp();
453 if( unOp.IsNotNull() )
454 {
455 push( compileTimeAllocator().New<ASTUnaryOp>(unOp, parseSimple(), position ) );
456 replace( parseSubscript( top() ) );
457 return pop();
458 }
459 }
460
461 // terminals
462 if( token == Tokens::LitInteger ) { push(compileTimeAllocator().New<ASTLiteral>(tokInteger, position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) ); return pop(); }
463 if( token == Tokens::LitFloat ) { push(compileTimeAllocator().New<ASTLiteral>(tokFloat , position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) ); return pop(); }
464 if( token == Tokens::LitString ) { push(compileTimeAllocator().New<ASTLiteral>(String(compileTimeAllocator, tokString), position )); NextToken(); replace( parseSubscript(top()) ); return pop(); }
465 if( token == Tokens::Identifier || token == Tokens::AlphaBinOp ) // allow bin op's names here! This is tricky but right!
466 {
467 String name= tokString;
468 NextToken();
469
470 // function
471 if( token == Tokens::BraceOpen )
472 {
473 ASTFunction* astFunction= compileTimeAllocator().New<ASTFunction>( name, position, compileTimeAllocator );
474 push( astFunction );
475 for(;;)
476 {
477 NextToken();
478 if( token == Tokens::BraceClose )
479 {
480 NextToken();
481 return pop();
482 }
483 astFunction->Arguments.emplace_back( Start() );
484
485 if( token == Tokens::Comma )
486 continue;
487
488 if( token != Tokens::BraceClose )
489 {
491 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
492 throw e;
493 }
494
495 NextToken();
496 replace( parseSubscript( astFunction ) );
497 return pop();
498 }
499 }
500
501 // identifier
502 replace( parseSubscript( push(compileTimeAllocator().New<ASTIdentifier>( String(compileTimeAllocator, name), position ) ) ) );
503 return pop();
504 }
505
506 // ---------------------------------------- ERRORS -----------------------------------------
507 if( token == Tokens::EOT )
508 {
510 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
511 throw e;
512 }
513
514 if( token == Tokens::BraceClose )
515 {
517 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
518 throw e;
519 }
520
521 if( token == Tokens::SubscriptOpen || token == Tokens::SubscriptClose )
522 {
524 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
525 throw e;
526 }
527
528 if( token == Tokens::Comma )
529 {
531 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
532 throw e;
533 }
534
535 ALIB_ERROR( "EXPR", "Internal error. This should never happen." )
536 return nullptr;
537}
538
540{
541 if( !HasBits( compiler.CfgCompilation, Compilation::AllowSubscriptOperator )
543 return function;
544
545 integer position= tokPosition;
546
547 NextToken();
548
549 push( Start() );
550
552 {
555 throw e;
556 }
557
558 // success
559 NextToken();
560 return compileTimeAllocator().New<ASTBinaryOp>( A_CHAR("[]"), function, pop(), position );
561}
562
563
564// #################################################################################################
565// Helpers
566// #################################################################################################
567
568
570{
572 {
573 // symbolic unary ops may be nested. Hence, we find one by one from the actual token and consume the
574 // token only if all is consumed.
575 for( integer partialRead= 1 ; partialRead <= tokString.Length() ; ++partialRead )
576 {
577 Substring key= Substring( tokString.Buffer(), partialRead );
578 if( unaryOperators.Contains( key ) )
579 {
580 if( partialRead == tokString.Length() )
581 NextToken();
582 else
583 {
584 tokString= String( tokString.Buffer() + partialRead,
585 tokString.Length() - partialRead );
586 tokPosition+= partialRead;
587 }
588 return key;
589 }
590 }
593 throw e;
594 }
595 else if ( token == Tokens::AlphaUnOp )
596 {
597 String alphabeticOperator= tokString;
598 NextToken();
599 return alphabeticOperator;
600 }
601
602 return NULL_STRING;
603}
604
606{
607 if ( token == Tokens::SymbolicOp )
608 {
609 // ignore ternary
610 if ( tokString == A_CHAR( "?" ) || tokString == A_CHAR( ":" ) )
611 return NULL_STRING;
612
613 // binary ops may be longer and concatenated with unaries. So we consume as much as possible
614 // but are happy with less than available
615 for ( integer partialRead = tokString.Length(); partialRead > 0; --partialRead )
616 {
617 Substring key = Substring( tokString.Buffer(), partialRead );
618 if ( binaryOperators.Contains( key ) )
619 {
620 if ( partialRead == tokString.Length() )
621 NextToken();
622 else
623 {
624 tokString = String( tokString.Buffer() + partialRead,
625 tokString.Length() - partialRead );
626 tokPosition += partialRead;
627 }
628 return key;
629 }
630 }
631
634 throw e;
635 }
636 else if ( token == Tokens::AlphaBinOp )
637 {
638 String alphabeticOperator= tokString;
639 NextToken();
640 return alphabeticOperator;
641 }
642
643 return NULL_STRING;
644}
645
646
647#undef Start
648
649}}} // namespace [alib::expressions::detail]
const String & GetResource(const NString &name)
Definition camp.inl:265
Exception & Add(const lang::CallerInfo &ci, TEnum type, TArgs &&... args)
Tokens token
The actual token type.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > unaryOperators
Compiler & compiler
The compiler that this parser works for.
String expression
The given expression to parse.
integer tokPosition
The position of the token in expression.
String tokString
String value of token (if applicable).
ParserImpl(Compiler &compiler, MonoAllocator &allocator)
@ SubscriptClose
A closing subscript brace.
@ AlphaBinOp
An alphabetic binary operator.
@ SymbolicOp
A symbolic operator. Can be unary or binary.
@ AlphaUnOp
An alphabetic unary operator.
@ SubscriptOpen
An opening subscript brace.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > binaryOperators
void NextToken()
This is the "scanner" or "lexer" method.
#define ALIB_CALLER_NULLED
Definition alib.inl:1010
#define A_CHAR(STR)
#define ALIB_ERROR(domain,...)
Definition alib.inl:1045
@ UnknownBinaryOperatorSymbol
Unknown binary operator symbol found when parsing expression string.
@ UnknownUnaryOperatorSymbol
Unknown unary operator symbol found when parsing expression string.
@ SyntaxErrorExpectation
Syntax error with concrete information about what the parser expected at given position.
constexpr String NULL_STRING
A nulled string of the default character type.
Definition string.inl:2463
lang::integer integer
Type alias in namespace alib.
Definition integers.inl:149
monomem::TMonoAllocator< lang::HeapAllocator > MonoAllocator
exceptions::Exception Exception
Type alias in namespace alib.
strings::TString< character > String
Type alias in namespace alib.
Definition string.inl:2381
expressions::ExpressionsCamp EXPRESSIONS
The singleton instance of ALib Camp class ExpressionsCamp.
strings::TSubstring< character > Substring
Type alias in namespace alib.
Abstract syntax tree node representing binary operators.
Definition ast_impl.inl:211