ALib C++ Library
Library Version: 2511 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
parser_impl.cpp
1//##################################################################################################
2// ALib C++ Library
3//
4// Copyright 2013-2025 A-Worx GmbH, Germany
5// Published under 'Boost Software License' (a free software license, see LICENSE.txt)
6//##################################################################################################
7#include "alib_precompile.hpp"
8#if !defined(ALIB_C20_MODULES) || ((ALIB_C20_MODULES != 0) && (ALIB_C20_MODULES != 1))
9# error "Symbol ALIB_C20_MODULES has to be given to the compiler as either 0 or 1"
10#endif
11#if ALIB_C20_MODULES
12 module;
13#endif
14//========================================= Global Fragment ========================================
17//============================================== Module ============================================
18#if ALIB_C20_MODULES
19 module ALib.Expressions.Impl;
20 import ALib.Expressions;
21#else
23#endif
24//========================================== Implementation ========================================
25namespace alib { namespace expressions { namespace detail {
26
27//##################################################################################################
28// Parser
29//##################################################################################################
30
32: compileTimeAllocator(allocator)
33, compiler (pCompiler)
34, unaryOperators (allocator)
35, binaryOperators (allocator) {
36 // characters to be known
37 syntaxTokens [u8'(']= true;
38 syntaxTokens [u8')']= true;
39 syntaxTokens [u8',']= true;
40 operatorChars[u8'?']= true;
41 operatorChars[u8':']= true;
42
43 // define unary ops
44 for( auto& op : compiler.UnaryOperators ) {
45 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op), "EXPR",
46 "Doubly defined unary operator symbol '{}'.", op )
47
48 unaryOperators.EmplaceUnique(op);
49 for( auto it : op )
50 operatorChars[it]= true;
51 }
52
53 for( auto& op : compiler.AlphabeticUnaryOperatorAliases ) {
54 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op.first), "EXPR",
55 "Doubly defined unary operator symbol '{}'.", op.first )
56
57 unaryOperators.EmplaceUnique(op.first);
58 if( !isalpha( op.first.CharAtStart() ) )
59 for( auto it : op.first )
60 operatorChars[it]= true;
61 }
62
63
64 for( auto& op : compiler.BinaryOperators ) {
65 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
66 "Doubly defined binary operator symbol '{}'.", op.first )
67 if( op.first == A_CHAR("[]") ) {
68 syntaxTokens[u8'[']= true;
69 syntaxTokens[u8']']= true;
70 } else {
71 binaryOperators.EmplaceUnique(op.first);
72 for( auto it : op.first )
73 operatorChars[it]= true;
74 } }
75
76 for( auto& op : compiler.AlphabeticBinaryOperatorAliases ) {
77 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
78 "Doubly defined binary operator symbol '{}'.", op.first )
79
80 ALIB_DBG( auto originalOp= )
81 compiler.BinaryOperators.Find( op.second );
82 ALIB_ASSERT_ERROR( originalOp != compiler.BinaryOperators.end(), "EXPR",
83 "Alias '{}' defined for unknown operator '{}'.",
84 op.first, op.second )
85
86 binaryOperators.EmplaceUnique(op.first);
87 if( !isalpha( op.first.CharAtStart() ) )
88 for( auto it : op.first )
89 operatorChars[it]= true;
90} }
91
92//##################################################################################################
93// Lexer
94//##################################################################################################
95void ParserImpl::NextToken() {
96 scanner.TrimStart();
97 tokPosition= expression.Length() - scanner.Length();
98
99 if( scanner.IsEmpty() ) {
100 token= Tokens::EOT;
101 return;
102 }
103
104 character first= scanner.CharAtStart<NC>();
105
106 //----------------------------------------- Syntax Tokens ----------------------------------------
107 if( syntaxTokens[first] ) {
108 token= Tokens(first);
109 scanner.ConsumeChar();
110 return;
111 }
112
113 //--------------------------------------- Symbolic operators -------------------------------------
114 // read up to 3 operator characters
115 if( operatorChars[first] ) {
116 integer operatorLength= 1;
117 scanner.ConsumeChar();
118 if( operatorChars[scanner.CharAtStart() ] ) {
119 scanner.ConsumeChar();
120 ++operatorLength;
121
122 if( operatorChars[scanner.CharAtStart() ] ) {
123 scanner.ConsumeChar();
124 ++operatorLength;
125 } }
126
127 token= Tokens::SymbolicOp;
128 tokString= String( expression.Buffer() + tokPosition, operatorLength );
129
130 // special treatment for Elvis with spaces "? :"
131 if( tokString == A_CHAR("?") && compiler.BinaryOperators.Contains( A_CHAR("?:") ) ) {
132 // patch existing token and return
133 Substring backup= scanner;
134 if( scanner.TrimStart().CharAtStart() == ':' ) {
135 tokString= A_CHAR("?:");
136 scanner.ConsumeChar();
137 }
138 else
139 scanner= backup;
140 }
141 return;
142 }
143
144 //-------------------------------------- alphabetic operators ------------------------------------
145 if( isalpha( first ) ) {
146 integer len= 1;
147 while( len < scanner.Length() && ( isalpha( scanner[len] ) || scanner[len] == '_' ) )
148 ++len;
149 tokString= scanner.Substring<NC>( 0, len );
150 auto hashCode= tokString.HashcodeIgnoreCase();
151
152 // unary
153 {
154 decltype(unaryOperators)::Iterator it;
155 if( (it= unaryOperators .Find( tokString, hashCode )) != unaryOperators.end()
156 && ( HasBits(compiler.CfgCompilation, Compilation::AlphabeticOperatorsIgnoreCase)
157 || tokString.Equals<NC>( it.Value() ) ) )
158 {
159 scanner.ConsumeChars<NC>( tokString.Length() );
160 token= Tokens::AlphaUnOp;
161 return;
162 } }
163
164 // binary
165 {
166 decltype(binaryOperators)::Iterator it;
167 if( (it= binaryOperators .Find( tokString, hashCode )) != binaryOperators.end()
168 && ( HasBits(compiler.CfgCompilation, Compilation::AlphabeticOperatorsIgnoreCase)
169 || tokString.Equals<NC>( it.Value() ) ) )
170 {
171 scanner.ConsumeChars<NC>( tokString.Length() );
172 token= Tokens::AlphaBinOp;
173 return;
174 } }
175
176 }
177
178 //------------------------------------------ Identifiers -----------------------------------------
179 if( isalpha( first ) || first == '_' ) {
180 integer endOfIdent= 0;
181 character next= 0;
182 while( ++endOfIdent < scanner.Length()
183 && ( isalnum( next= scanner[endOfIdent] )
184 || next == '_' ) );
185
186 token= Tokens::Identifier;
187 tokString= String( expression.Buffer() + tokPosition, endOfIdent );
188 scanner.ConsumeChars<NC>( endOfIdent );
189 return;
190 }
191
192 //-------------------------------------------- numbers -------------------------------------------
193 if( isdigit( first ) ) {
194 integer endOfDecPart= 0;
195 character next= 0;
196 while( ++endOfDecPart < scanner.Length()
197 && ( isdigit( next= scanner[endOfDecPart] )
198 || ( HasBits(numberFormat->Flags, NumberFormatFlags::ReadGroupChars) && next== numberFormat->ThousandsGroupChar ) )
199 );
200
201
202 // float number
203 if( next == numberFormat->DecimalPointChar
204 || next == 'e'
205 || next == 'E'
206 || scanner.Substring( endOfDecPart ).StartsWith( numberFormat->ExponentSeparator ) )
207
208 {
209 auto oldStart= scanner.Buffer();
210 double value;
211 scanner.ConsumeFloat( value, numberFormat );
212 token = Tokens::LitFloat;
213 tokFloat= value;
214
215 String numberParsed( oldStart, scanner.Buffer() - oldStart );
216 tokLiteralHint= numberParsed.IndexOf('e') > 0
217 || numberParsed.IndexOf('E') > 0
218 || numberParsed.IndexOf( numberFormat->ExponentSeparator ) > 0
219 ? ASTLiteral::NFHint::Scientific
220 : ASTLiteral::NFHint::NONE;
221 }
222
223 // integer number
224 else {
225 tokLiteralHint= ASTLiteral::NFHint::NONE;
226 if( numberFormat->HexLiteralPrefix.IsNotEmpty()
227 && scanner.StartsWith( numberFormat->HexLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Hexadecimal;
228 else if( numberFormat->OctLiteralPrefix.IsNotEmpty()
229 && scanner.StartsWith( numberFormat->OctLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Octal;
230 else if( numberFormat->BinLiteralPrefix.IsNotEmpty()
231 && scanner.StartsWith( numberFormat->BinLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Binary;
232
233 integer value;
234 scanner.ConsumeInt( value, numberFormat );
235 token= Tokens::LitInteger;
236 tokInteger= value;
237 }
238
239 return;
240 }
241
242 //-------------------------------------------- Strings -------------------------------------------
243 if( first == '"' ) {
244 bool lastWasSlash= false;
245 scanner.ConsumeChar<NC>();
246 character next;
247 while( (next= scanner.ConsumeChar()) != '\0' ) {
248 if( next == '\\' ) { lastWasSlash= true; continue; }
249 if( next == '\"' && !lastWasSlash ) break;
250 lastWasSlash= false;
251 }
252
253 if( next != '"' ) {
254 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation,
255 EXPRESSIONS.GetResource("EE4") );
256 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo,
257 expression, expression.Length() - scanner.Length() );
258 throw e;
259 }
260
261 String quoted( expression.Buffer() + tokPosition + 1,
262 expression.Length() - scanner.Length() - tokPosition -2 );
263 token = Tokens::LitString;
264 tokString.Allocate(compileTimeAllocator, String1K(quoted) << Escape( lang::Switch::Off ) );
265 return;
266 }
267
268 //--------------------------------------- unrecognized token -------------------------------------
269 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxError );
270 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, expression.Length() - scanner.Length() );
271 throw e;
272}
273
274
275
276//##################################################################################################
277// Parser
278//##################################################################################################
279#define Start parseConditional
280
281detail::AST* ParserImpl::Parse( const String& exprString, NumberFormat* nf ) {
282 if( exprString.IsEmpty() )
283 throw Exception( ALIB_CALLER, Exceptions::EmptyExpressionString );
284
285 expression = exprString;
286 numberFormat= nf;
287 ASTs = compileTimeAllocator().New<StdVectorMA<AST*>>( compileTimeAllocator );
288 ASTs->reserve(20);
289
290 // load first token
291 scanner= expression;
292 NextToken();
293
294//ALIB_DBG( lexer.DbgListTokens(); )
295
296 AST* ast= Start();
297
298
299 // if tokens remain, an "operator" would be expected
300 if( token != Tokens::EOT ) {
301 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE5") );
302 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
303 throw e;
304 }
305
306 return ast;
307}
308
309
310AST* ParserImpl::parseConditional() {
311 // parse lhs as simple
312 push( parseBinary() ); // Q
313
314 integer qmPosition= tokPosition;
315
316
317 if( token == Tokens::SymbolicOp && tokString == A_CHAR("?") ) {
318 NextToken();
319 push( Start() ); // T
320
321 // expect colon
322 if( token != Tokens::SymbolicOp || tokString != A_CHAR(":") ) {
323 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE6") );
324 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
325 throw e;
326 }
327 integer colonPosition= tokPosition;
328
329 NextToken();
330
331 AST* F= Start();
332 AST* T= pop();
333 AST* Q= pop();
334 return compileTimeAllocator().New<ASTConditional>(Q, T, F, qmPosition, colonPosition );
335 }
336
337 // was no conditional
338 return pop();
339}
340
341AST* ParserImpl::parseBinary() {
342 // parse lhs as simple
343 push( parseSimple() );
344
345 // parse
346 integer position= tokPosition;
347 String binOp;
348 for( ;; ) {
349 binOp= getBinaryOp();
350 if( binOp.IsNull() )
351 return pop();
352
353 // rhs is braced? -> lhs becomes <lhs op rhs> and we start over
354 if( token == Tokens::BraceOpen ) {
355 replace( compileTimeAllocator().New<ASTBinaryOp>(binOp, top(), parseSimple(), position ) );
356 position= tokPosition;
357 continue;
358 }
359 break;
360 }
361
362 // check if tokens remain
363 if( token == Tokens::EOT ) {
364 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE7") );
365 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
366 throw e;
367 }
368
369 AST* lhs= top();
370 AST* rhs= push( parseBinary() );
371
372 int binOpPrecedence= compiler.GetBinaryOperatorPrecedence( binOp );
373 AST* replace = rhs;
374 ASTBinaryOp* parent = nullptr;
375 while( replace->NodeType == AST::Types::BinaryOp
376 && compiler.GetBinaryOperatorPrecedence(dynamic_cast<ASTBinaryOp*>(replace)->Operator) <= binOpPrecedence )
377 {
378 parent = dynamic_cast<ASTBinaryOp*>(replace);
379 replace= parent->Lhs;
380 }
381
382 pop();
383 pop();
384 if( parent == nullptr )
385 return compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, rhs, position );
386
387 // insert binary at lhs of deepest equal-level binary found.
388 // Its current lhs becomes its new lhs-child's rhs.
389 parent->Lhs= compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, parent->Lhs, position );
390 return rhs;
391}
392
394 // '(' expr ')' (brackets)
395 if( token == Tokens::BraceOpen ) {
396 NextToken();
397 push( Start() );
398
399 if( token != Tokens::BraceClose ) {
400 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE1"));
401 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
402 throw e;
403 }
404 NextToken();
405 replace( parseSubscript( top() ) );
406 return pop();
407 }
408
409 // unary operator
410 integer position= tokPosition;
411 {
412 String unOp= getUnaryOp();
413 if( unOp.IsNotNull() ) {
414 push( compileTimeAllocator().New<ASTUnaryOp>(unOp, parseSimple(), position ) );
415 replace( parseSubscript( top() ) );
416 return pop();
417 } }
418
419 // terminals
420 if( token == Tokens::LitInteger ) { push(compileTimeAllocator().New<ASTLiteral>(tokInteger, position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) ); return pop(); }
421 if( token == Tokens::LitFloat ) { push(compileTimeAllocator().New<ASTLiteral>(tokFloat , position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) ); return pop(); }
422 if( token == Tokens::LitString ) { push(compileTimeAllocator().New<ASTLiteral>(String(compileTimeAllocator, tokString), position )); NextToken(); replace( parseSubscript(top()) ); return pop(); }
423
424 // allow bin op's names here! This is tricky but right!
425 if( token == Tokens::Identifier || token == Tokens::AlphaBinOp ) {
426 String name= tokString;
427 NextToken();
428
429 // function
430 if( token == Tokens::BraceOpen ) {
431 ASTFunction* astFunction= compileTimeAllocator().New<ASTFunction>( name, position, compileTimeAllocator );
432 push( astFunction );
433 for(;;) {
434 NextToken();
435 if( token == Tokens::BraceClose ) {
436 NextToken();
437 return pop();
438 }
439 astFunction->Arguments.emplace_back( Start() );
440
441 if( token == Tokens::Comma )
442 continue;
443
444 if( token != Tokens::BraceClose ) {
446 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
447 throw e;
448 }
449
450 NextToken();
451 replace( parseSubscript( astFunction ) );
452 return pop();
453 } }
454
455 // identifier
456 replace( parseSubscript( push(compileTimeAllocator().New<ASTIdentifier>( String(compileTimeAllocator, name), position ) ) ) );
457 return pop();
458 }
459
460 //--------------------------------------------- ERRORS -------------------------------------------
461 if( token == Tokens::EOT ) {
463 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
464 throw e;
465 }
466
467 if( token == Tokens::BraceClose ) {
469 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
470 throw e;
471 }
472
473 if( token == Tokens::SubscriptOpen || token == Tokens::SubscriptClose ) {
475 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
476 throw e;
477 }
478
479 if( token == Tokens::Comma ) {
481 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
482 throw e;
483 }
484
485 ALIB_ERROR( "EXPR", "Internal error. This should never happen." )
486 return nullptr;
487}
488
490 if( !HasBits( compiler.CfgCompilation, Compilation::AllowSubscriptOperator )
492 return function;
493
494 integer position= tokPosition;
495
496 NextToken();
497
498 push( Start() );
499
503 throw e;
504 }
505
506 // success
507 NextToken();
508 return compileTimeAllocator().New<ASTBinaryOp>( A_CHAR("[]"), function, pop(), position );
509}
510
511
512//##################################################################################################
513// Helpers
514//##################################################################################################
515
516
518 if( token == Tokens::SymbolicOp ) {
519 // symbolic unary ops may be nested. Hence, we find one by one from the actual token and consume the
520 // token only if all is consumed.
521 for( integer partialRead= 1 ; partialRead <= tokString.Length() ; ++partialRead ) {
522 Substring key= Substring( tokString.Buffer(), partialRead );
523 if( unaryOperators.Contains( key ) ) {
524 if( partialRead == tokString.Length() )
525 NextToken();
526 else {
527 tokString= String( tokString.Buffer() + partialRead,
528 tokString.Length() - partialRead );
529 tokPosition+= partialRead;
530 }
531 return key;
532 } }
535 throw e;
536 }
537 else if ( token == Tokens::AlphaUnOp ) {
538 String alphabeticOperator= tokString;
539 NextToken();
540 return alphabeticOperator;
541 }
542
543 return NULL_STRING;
544}
545
547 if ( token == Tokens::SymbolicOp ) {
548 // ignore ternary
549 if ( tokString == A_CHAR( "?" ) || tokString == A_CHAR( ":" ) )
550 return NULL_STRING;
551
552 // binary ops may be longer and concatenated with unaries. So we consume as much as possible
553 // but are happy with less than available
554 for ( integer partialRead = tokString.Length(); partialRead > 0; --partialRead ) {
555 Substring key = Substring( tokString.Buffer(), partialRead );
556 if ( binaryOperators.Contains( key ) ) {
557 if ( partialRead == tokString.Length() )
558 NextToken();
559 else {
560 tokString = String( tokString.Buffer() + partialRead,
561 tokString.Length() - partialRead );
562 tokPosition += partialRead;
563 }
564 return key;
565 } }
566
569 throw e;
570 }
571 else if ( token == Tokens::AlphaBinOp ) {
572 String alphabeticOperator= tokString;
573 NextToken();
574 return alphabeticOperator;
575 }
576
577 return NULL_STRING;
578}
579
580
581#undef Start
582
583}}} // namespace [alib::expressions::detail]
const String & GetResource(const NString &name)
Definition camp.inl:260
Exception & Add(const lang::CallerInfo &ci, TEnum type, TArgs &&... args)
Tokens token
The actual token type.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > unaryOperators
Compiler & compiler
The compiler that this parser works for.
String expression
The given expression to parse.
integer tokPosition
The position of the token in expression.
String tokString
String value of token (if applicable).
ParserImpl(Compiler &compiler, MonoAllocator &allocator)
@ SubscriptClose
A closing subscript brace.
@ AlphaBinOp
An alphabetic binary operator.
@ SymbolicOp
A symbolic operator. Can be unary or binary.
@ AlphaUnOp
An alphabetic unary operator.
@ SubscriptOpen
An opening subscript brace.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > binaryOperators
void NextToken()
This is the "scanner" or "lexer" method.
#define ALIB_CALLER_NULLED
Definition alib.inl:1027
#define A_CHAR(STR)
#define ALIB_ERROR(domain,...)
Definition alib.inl:1062
@ UnknownBinaryOperatorSymbol
Unknown binary operator symbol found when parsing expression string.
@ UnknownUnaryOperatorSymbol
Unknown unary operator symbol found when parsing expression string.
@ SyntaxErrorExpectation
Syntax error with concrete information about what the parser expected at given position.
constexpr String NULL_STRING
A nulled string of the default character type.
Definition string.inl:2271
lang::integer integer
Type alias in namespace alib.
Definition integers.inl:149
monomem::TMonoAllocator< lang::HeapAllocator > MonoAllocator
exceptions::Exception Exception
Type alias in namespace alib.
strings::TString< character > String
Type alias in namespace alib.
Definition string.inl:2189
expressions::ExpressionsCamp EXPRESSIONS
The singleton instance of ALib Camp class ExpressionsCamp.
strings::TSubstring< character > Substring
Type alias in namespace alib.
Abstract syntax tree node representing binary operators.
Definition ast_impl.inl:197