ALib C++ Framework
by
Library Version: 2605 R0
Documentation generated by doxygen
Loading...
Searching...
No Matches
parser_impl.cpp
1namespace alib { namespace expressions { namespace detail {
2
3//##################################################################################################
4// Parser
5//##################################################################################################
6
8: compileTimeAllocator(allocator)
9, compiler (pCompiler)
10, unaryOperators (allocator)
11, binaryOperators (allocator) {
12 // characters to be known
13 syntaxTokens [u8'(']= true;
14 syntaxTokens [u8')']= true;
15 syntaxTokens [u8',']= true;
16 operatorChars[u8'?']= true;
17 operatorChars[u8':']= true;
18
19 // define unary ops
20 for( auto& op : compiler.UnaryOperators ) {
21 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op), "EXPR",
22 "Doubly defined unary operator symbol '{}'.", op )
23
24 unaryOperators.EmplaceUnique(op);
25 for( auto it : op )
26 operatorChars[it]= true;
27 }
28
29 for( auto& op : compiler.AlphabeticUnaryOperatorAliases ) {
30 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op.first), "EXPR",
31 "Doubly defined unary operator symbol '{}'.", op.first )
32
33 unaryOperators.EmplaceUnique(op.first);
34 if( !isalpha( op.first.CharAtStart() ) )
35 for( auto it : op.first )
36 operatorChars[it]= true;
37 }
38
39
40 for( auto& op : compiler.BinaryOperators ) {
41 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
42 "Doubly defined binary operator symbol '{}'.", op.first )
43 if( op.first == A_CHAR("[]") ) {
44 syntaxTokens[u8'[']= true;
45 syntaxTokens[u8']']= true;
46 } else {
47 binaryOperators.EmplaceUnique(op.first);
48 for( auto it : op.first )
49 operatorChars[it]= true;
50 } }
51
52 for( auto& op : compiler.AlphabeticBinaryOperatorAliases ) {
53 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
54 "Doubly defined binary operator symbol '{}'.", op.first )
55
56 ALIB_DBG( auto originalOp= )
57 compiler.BinaryOperators.Find( op.second );
58 ALIB_ASSERT_ERROR( originalOp != compiler.BinaryOperators.end(), "EXPR",
59 "Alias '{}' defined for unknown operator '{}'.",
60 op.first, op.second )
61
62 binaryOperators.EmplaceUnique(op.first);
63 if( !isalpha( op.first.CharAtStart() ) )
64 for( auto it : op.first )
65 operatorChars[it]= true;
66} }
67
68//##################################################################################################
69// Lexer
70//##################################################################################################
71void ParserImpl::NextToken() {
72 scanner.TrimStart();
73 tokPosition= expression.Length() - scanner.Length();
74
75 if( scanner.IsEmpty() ) {
76 token= Tokens::EOT;
77 return;
78 }
79
80 character first= scanner.CharAtStart<NC>();
81
82 //----------------------------------------- Syntax Tokens ----------------------------------------
83 if( syntaxTokens[first] ) {
84 token= Tokens(first);
85 scanner.ConsumeChar();
86 return;
87 }
88
89 //--------------------------------------- Symbolic operators -------------------------------------
90 // read up to 3 operator characters
91 if( operatorChars[first] ) {
92 integer operatorLength= 1;
93 scanner.ConsumeChar();
94 if( operatorChars[scanner.CharAtStart() ] ) {
95 scanner.ConsumeChar();
96 ++operatorLength;
97
98 if( operatorChars[scanner.CharAtStart() ] ) {
99 scanner.ConsumeChar();
100 ++operatorLength;
101 } }
102
103 token= Tokens::SymbolicOp;
104 tokString= String( expression.Buffer() + tokPosition, operatorLength );
105
106 // special treatment for Elvis with spaces "? :"
107 if( tokString == A_CHAR("?") && compiler.BinaryOperators.Contains( A_CHAR("?:") ) ) {
108 // patch existing token and return
109 Substring backup= scanner;
110 if( scanner.TrimStart().CharAtStart() == ':' ) {
111 tokString= A_CHAR("?:");
112 scanner.ConsumeChar();
113 }
114 else
115 scanner= backup;
116 }
117 return;
118 }
119
120 //-------------------------------------- alphabetic operators ------------------------------------
121 if( isalpha( first ) ) {
122 integer len= 1;
123 while( len < scanner.Length() && ( isalpha( scanner[len] ) || scanner[len] == '_' ) )
124 ++len;
125 tokString= scanner.Substring<NC>( 0, len );
126 auto hashCode= tokString.HashcodeIgnoreCase();
127
128 // unary
129 {
130 decltype(unaryOperators)::Iterator it;
131 if( (it= unaryOperators .Find( tokString, hashCode )) != unaryOperators.end()
132 && ( HasBits(compiler.CfgCompilation, Compilation::AlphabeticOperatorsIgnoreCase)
133 || tokString.Equals<NC>( it.Value() ) ) )
134 {
135 scanner.ConsumeChars<NC>( tokString.Length() );
136 token= Tokens::AlphaUnOp;
137 return;
138 } }
139
140 // binary
141 {
142 decltype(binaryOperators)::Iterator it;
143 if( (it= binaryOperators .Find( tokString, hashCode )) != binaryOperators.end()
144 && ( HasBits(compiler.CfgCompilation, Compilation::AlphabeticOperatorsIgnoreCase)
145 || tokString.Equals<NC>( it.Value() ) ) )
146 {
147 scanner.ConsumeChars<NC>( tokString.Length() );
148 token= Tokens::AlphaBinOp;
149 return;
150 } }
151
152 }
153
154 //------------------------------------------ Identifiers -----------------------------------------
155 if( isalpha( first ) || first == '_' ) {
156 integer endOfIdent= 0;
157 character next= 0;
158 while( ++endOfIdent < scanner.Length()
159 && ( isalnum( next= scanner[endOfIdent] )
160 || next == '_' ) );
161
162 token= Tokens::Identifier;
163 tokString= String( expression.Buffer() + tokPosition, endOfIdent );
164 scanner.ConsumeChars<NC>( endOfIdent );
165 return;
166 }
167
168 //-------------------------------------------- numbers -------------------------------------------
169 if( isdigit( first ) ) {
170 integer endOfDecPart= 0;
171 character next= 0;
172 while( ++endOfDecPart < scanner.Length()
173 && ( isdigit( next= scanner[endOfDecPart] )
174 || ( HasBits(numberFormat->Flags, NumberFormatFlags::ReadGroupChars) && next== numberFormat->ThousandsGroupChar ) )
175 );
176
177
178 // float number
179 if( next == numberFormat->DecimalPointChar
180 || next == 'e'
181 || next == 'E'
182 || scanner.Substring( endOfDecPart ).StartsWith( numberFormat->ExponentSeparator ) )
183
184 {
185 auto oldStart= scanner.Buffer();
186 double value;
187 scanner.ConsumeFloat( value, numberFormat );
188 token = Tokens::LitFloat;
189 tokFloat= value;
190
191 String numberParsed( oldStart, scanner.Buffer() - oldStart );
192 tokLiteralHint= numberParsed.IndexOf('e') > 0
193 || numberParsed.IndexOf('E') > 0
194 || numberParsed.IndexOf( numberFormat->ExponentSeparator ) > 0
195 ? ASTLiteral::NFHint::Scientific
196 : ASTLiteral::NFHint::NONE;
197 }
198
199 // integer number
200 else {
201 tokLiteralHint= ASTLiteral::NFHint::NONE;
202 if( numberFormat->HexLiteralPrefix.IsNotEmpty()
203 && scanner.StartsWith( numberFormat->HexLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Hexadecimal;
204 else if( numberFormat->OctLiteralPrefix.IsNotEmpty()
205 && scanner.StartsWith( numberFormat->OctLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Octal;
206 else if( numberFormat->BinLiteralPrefix.IsNotEmpty()
207 && scanner.StartsWith( numberFormat->BinLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Binary;
208
209 integer value;
210 scanner.ConsumeInt( value, numberFormat );
211 token= Tokens::LitInteger;
212 tokInteger= value;
213 }
214
215 return;
216 }
217
218 //-------------------------------------------- Strings -------------------------------------------
219 if( first == '"' ) {
220 bool lastWasSlash= false;
221 scanner.ConsumeChar<NC>();
222 character next;
223 while( (next= scanner.ConsumeChar()) != '\0' ) {
224 if( next == '\\' ) { lastWasSlash= true; continue; }
225 if( next == '\"' && !lastWasSlash ) break;
226 lastWasSlash= false;
227 }
228
229 if( next != '"' ) {
230 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation,
231 EXPRESSIONS.GetResource("EE4") );
232 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo,
233 expression, expression.Length() - scanner.Length() );
234 throw e;
235 }
236
237 String quoted( expression.Buffer() + tokPosition + 1,
238 expression.Length() - scanner.Length() - tokPosition -2 );
239 token = Tokens::LitString;
240 tokString.Allocate(compileTimeAllocator, String1K(quoted) << Escape( lang::Switch::Off ) );
241 return;
242 }
243
244 //--------------------------------------- unrecognized token -------------------------------------
245 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxError );
246 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, expression.Length() - scanner.Length() );
247 throw e;
248}
249
250
251
252//##################################################################################################
253// Parser
254//##################################################################################################
255#define Start parseConditional
256
257detail::AST* ParserImpl::Parse( const String& exprString, NumberFormat* nf ) {
258 if( exprString.IsEmpty() )
259 throw Exception( ALIB_CALLER, Exceptions::EmptyExpressionString );
260
261 expression = exprString;
262 numberFormat= nf;
263 ASTs = compileTimeAllocator().New<StdVectorMA<AST*>>( compileTimeAllocator );
264 ASTs->reserve(20);
265
266 // load first token
267 scanner= expression;
268 NextToken();
269
270//ALIB_DBG( lexer.DbgListTokens(); )
271
272 AST* ast= Start();
273
274
275 // if tokens remain, an "operator" would be expected
276 if( token != Tokens::EOT ) {
277 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE5") );
278 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
279 throw e;
280 }
281
282 return ast;
283}
284
285
286AST* ParserImpl::parseConditional() {
287 // parse lhs as simple
288 push( parseBinary() ); // Q
289
290 integer qmPosition= tokPosition;
291
292
293 if( token == Tokens::SymbolicOp && tokString == A_CHAR("?") ) {
294 NextToken();
295 push( Start() ); // T
296
297 // expect colon
298 if( token != Tokens::SymbolicOp || tokString != A_CHAR(":") ) {
299 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE6") );
300 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
301 throw e;
302 }
303 integer colonPosition= tokPosition;
304
305 NextToken();
306
307 AST* F= Start();
308 AST* T= pop();
309 AST* Q= pop();
310 return compileTimeAllocator().New<ASTConditional>(Q, T, F, qmPosition, colonPosition );
311 }
312
313 // was no conditional
314 return pop();
315}
316
317AST* ParserImpl::parseBinary() {
318 // parse lhs as simple
319 push( parseSimple() );
320
321 // parse
322 integer position= tokPosition;
323 String binOp;
324 for( ;; ) {
325 binOp= getBinaryOp();
326 if( binOp.IsNull() )
327 return pop();
328
329 // rhs is braced? -> lhs becomes <lhs op rhs> and we start over
330 if( token == Tokens::BraceOpen ) {
331 replace( compileTimeAllocator().New<ASTBinaryOp>(binOp, top(), parseSimple(), position ) );
332 position= tokPosition;
333 continue;
334 }
335 break;
336 }
337
338 // check if tokens remain
339 if( token == Tokens::EOT ) {
340 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE7") );
341 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
342 throw e;
343 }
344
345 AST* lhs= top();
346 AST* rhs= push( parseBinary() );
347
348 int binOpPrecedence= compiler.GetBinaryOperatorPrecedence( binOp );
349 AST* replace = rhs;
350 ASTBinaryOp* parent = nullptr;
351 while( replace->NodeType == AST::Types::BinaryOp
352 && compiler.GetBinaryOperatorPrecedence(dynamic_cast<ASTBinaryOp*>(replace)->Operator) <= binOpPrecedence )
353 {
354 parent = dynamic_cast<ASTBinaryOp*>(replace);
355 replace= parent->Lhs;
356 }
357
358 pop();
359 pop();
360 if( parent == nullptr )
361 return compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, rhs, position );
362
363 // insert binary at lhs of deepest equal-level binary found.
364 // Its current lhs becomes its new lhs-child's rhs.
365 parent->Lhs= compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, parent->Lhs, position );
366 return rhs;
367}
368
370 // '(' expr ')' (brackets)
371 if( token == Tokens::BraceOpen ) {
372 NextToken();
373 push( Start() );
374
375 if( token != Tokens::BraceClose ) {
376 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE1"));
377 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
378 throw e;
379 }
380 NextToken();
381 replace( parseSubscript( top() ) );
382 return pop();
383 }
384
385 // unary operator
386 integer position= tokPosition;
387 {
388 String unOp= getUnaryOp();
389 if( unOp.IsNotNull() ) {
390 push( compileTimeAllocator().New<ASTUnaryOp>(unOp, parseSimple(), position ) );
391 replace( parseSubscript( top() ) );
392 return pop();
393 } }
394
395 // terminals
396 if( token == Tokens::LitInteger ) { push(compileTimeAllocator().New<ASTLiteral>(tokInteger, position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) ); return pop(); }
397 if( token == Tokens::LitFloat ) { push(compileTimeAllocator().New<ASTLiteral>(tokFloat , position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) ); return pop(); }
398 if( token == Tokens::LitString ) { push(compileTimeAllocator().New<ASTLiteral>(String(compileTimeAllocator, tokString), position )); NextToken(); replace( parseSubscript(top()) ); return pop(); }
399
400 // allow bin op's names here! This is tricky but right!
401 if( token == Tokens::Identifier || token == Tokens::AlphaBinOp ) {
402 String name= tokString;
403 NextToken();
404
405 // function
406 if( token == Tokens::BraceOpen ) {
407 ASTFunction* astFunction= compileTimeAllocator().New<ASTFunction>( name, position, compileTimeAllocator );
408 push( astFunction );
409 for(;;) {
410 NextToken();
411 if( token == Tokens::BraceClose ) {
412 NextToken();
413 return pop();
414 }
415 astFunction->Arguments.emplace_back( Start() );
416
417 if( token == Tokens::Comma )
418 continue;
419
420 if( token != Tokens::BraceClose ) {
422 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
423 throw e;
424 }
425
426 NextToken();
427 replace( parseSubscript( astFunction ) );
428 return pop();
429 } }
430
431 // identifier
432 replace( parseSubscript( push(compileTimeAllocator().New<ASTIdentifier>( String(compileTimeAllocator, name), position ) ) ) );
433 return pop();
434 }
435
436 //--------------------------------------------- ERRORS -------------------------------------------
437 if( token == Tokens::EOT ) {
439 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
440 throw e;
441 }
442
443 if( token == Tokens::BraceClose ) {
445 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
446 throw e;
447 }
448
449 if( token == Tokens::SubscriptOpen || token == Tokens::SubscriptClose ) {
451 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
452 throw e;
453 }
454
455 if( token == Tokens::Comma ) {
457 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
458 throw e;
459 }
460
461 ALIB_ERROR( "EXPR", "Internal error. This should never happen." )
462 return nullptr;
463}
464
466 if( !HasBits( compiler.CfgCompilation, Compilation::AllowSubscriptOperator )
468 return function;
469
470 integer position= tokPosition;
471
472 NextToken();
473
474 push( Start() );
475
479 throw e;
480 }
481
482 // success
483 NextToken();
484 return compileTimeAllocator().New<ASTBinaryOp>( A_CHAR("[]"), function, pop(), position );
485}
486
487
488//##################################################################################################
489// Helpers
490//##################################################################################################
491
492
494 if( token == Tokens::SymbolicOp ) {
495 // symbolic unary ops may be nested. Hence, we find one by one from the actual token and consume the
496 // token only if all is consumed.
497 for( integer partialRead= 1 ; partialRead <= tokString.Length() ; ++partialRead ) {
498 Substring key= Substring( tokString.Buffer(), partialRead );
499 if( unaryOperators.Contains( key ) ) {
500 if( partialRead == tokString.Length() )
501 NextToken();
502 else {
503 tokString= String( tokString.Buffer() + partialRead,
504 tokString.Length() - partialRead );
505 tokPosition+= partialRead;
506 }
507 return key;
508 } }
511 throw e;
512 }
513 else if ( token == Tokens::AlphaUnOp ) {
514 String alphabeticOperator= tokString;
515 NextToken();
516 return alphabeticOperator;
517 }
518
519 return NULL_STRING;
520}
521
523 if ( token == Tokens::SymbolicOp ) {
524 // ignore ternary
525 if ( tokString == A_CHAR( "?" ) || tokString == A_CHAR( ":" ) )
526 return NULL_STRING;
527
528 // binary ops may be longer and concatenated with unaries. So we consume as much as possible
529 // but are happy with less than available
530 for ( integer partialRead = tokString.Length(); partialRead > 0; --partialRead ) {
531 Substring key = Substring( tokString.Buffer(), partialRead );
532 if ( binaryOperators.Contains( key ) ) {
533 if ( partialRead == tokString.Length() )
534 NextToken();
535 else {
536 tokString = String( tokString.Buffer() + partialRead,
537 tokString.Length() - partialRead );
538 tokPosition += partialRead;
539 }
540 return key;
541 } }
542
545 throw e;
546 }
547 else if ( token == Tokens::AlphaBinOp ) {
548 String alphabeticOperator= tokString;
549 NextToken();
550 return alphabeticOperator;
551 }
552
553 return NULL_STRING;
554}
555
556
557#undef Start
558
559}}} // namespace [alib::expressions::detail]
#define ALIB_CALLER_NULLED
#define A_CHAR(STR)
#define ALIB_ERROR(domain,...)
Exception & Add(const lang::CallerInfo &ci, TEnum type, TArgs &&... args)
Tokens token
The actual token type.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > unaryOperators
Compiler & compiler
The compiler that this parser works for.
String expression
The given expression to parse.
integer tokPosition
The position of the token in #".expression".
String tokString
String value of token (if applicable).
ParserImpl(Compiler &compiler, MonoAllocator &allocator)
@ SubscriptClose
A closing subscript brace.
@ AlphaBinOp
An alphabetic binary operator.
@ SymbolicOp
A symbolic operator. Can be unary or binary.
@ AlphaUnOp
An alphabetic unary operator.
@ SubscriptOpen
An opening subscript brace.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > binaryOperators
void NextToken()
This is the "scanner" or "lexer" method.
const String & GetResource(const NString &name)
@ UnknownBinaryOperatorSymbol
Unknown binary operator symbol found when parsing expression string.
@ UnknownUnaryOperatorSymbol
Unknown unary operator symbol found when parsing expression string.
@ SyntaxErrorExpectation
Syntax error with concrete information about what the parser expected at given position.
Definition alox.cpp:14
monomem::TMonoAllocator< lang::HeapAllocator > MonoAllocator
constexpr String NULL_STRING
A nulled string of the default character type.
Definition string.hpp:2247
lang::integer integer
Type alias in namespace #"%alib".
Definition integers.hpp:149
strings::TString< character > String
Type alias in namespace #"%alib".
Definition string.hpp:2165
expressions::ExpressionsCamp EXPRESSIONS
The singleton instance of ALib Camp class #"ExpressionsCamp".
strings::TSubstring< character > Substring
Type alias in namespace #"%alib".
exceptions::Exception Exception
Type alias in namespace #"%alib".
Abstract syntax tree node representing binary operators.
Definition ast_impl.hpp:190