8#if !defined(ALIB_C20_MODULES) || ((ALIB_C20_MODULES != 0) && (ALIB_C20_MODULES != 1))
9# error "Symbol ALIB_C20_MODULES has to be given to the compiler as either 0 or 1"
19 module ALib.Expressions.Impl;
20 import ALib.Expressions;
44 for(
auto& op :
compiler.UnaryOperators ) {
45 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op),
"EXPR",
46 "Doubly defined unary operator symbol '{}'.", op )
48 unaryOperators.EmplaceUnique(op);
50 operatorChars[it]= true;
53 for(
auto& op :
compiler.AlphabeticUnaryOperatorAliases ) {
54 ALIB_ASSERT_ERROR( !unaryOperators.Contains(op.first),
"EXPR",
55 "Doubly defined unary operator symbol '{}'.", op.first )
57 unaryOperators.EmplaceUnique(op.first);
58 if( !isalpha( op.first.CharAtStart() ) )
59 for( auto it : op.first )
60 operatorChars[it]= true;
64 for(
auto& op :
compiler.BinaryOperators ) {
65 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first),
"EXPR",
66 "Doubly defined binary operator symbol '{}'.", op.first )
67 if( op.first == A_CHAR(
"[]") ) {
68 syntaxTokens[u8'[
']= true;
69 syntaxTokens[u8']
']= true;
71 binaryOperators.EmplaceUnique(op.first);
72 for( auto it : op.first )
73 operatorChars[it]= true;
76 for( auto& op : compiler.AlphabeticBinaryOperatorAliases ) {
77 ALIB_ASSERT_ERROR( !binaryOperators.Contains(op.first), "EXPR",
78 "Doubly defined binary operator symbol '{}
'.", op.first )
80 ALIB_DBG( auto originalOp= )
81 compiler.BinaryOperators.Find( op.second );
82 ALIB_ASSERT_ERROR( originalOp != compiler.BinaryOperators.end(), "EXPR",
83 "Alias '{}
' defined for unknown operator '{}
'.",
86 binaryOperators.EmplaceUnique(op.first);
87 if( !isalpha( op.first.CharAtStart() ) )
88 for( auto it : op.first )
89 operatorChars[it]= true;
92//##################################################################################################
94//##################################################################################################
95void ParserImpl::NextToken() {
97 tokPosition= expression.Length() - scanner.Length();
99 if( scanner.IsEmpty() ) {
104 character first= scanner.CharAtStart<NC>();
106 //----------------------------------------- Syntax Tokens ----------------------------------------
107 if( syntaxTokens[first] ) {
108 token= Tokens(first);
109 scanner.ConsumeChar();
113 //--------------------------------------- Symbolic operators -------------------------------------
114 // read up to 3 operator characters
115 if( operatorChars[first] ) {
116 integer operatorLength= 1;
117 scanner.ConsumeChar();
118 if( operatorChars[scanner.CharAtStart() ] ) {
119 scanner.ConsumeChar();
122 if( operatorChars[scanner.CharAtStart() ] ) {
123 scanner.ConsumeChar();
127 token= Tokens::SymbolicOp;
128 tokString= String( expression.Buffer() + tokPosition, operatorLength );
130 // special treatment for Elvis with spaces "? :"
131 if( tokString == A_CHAR("?") && compiler.BinaryOperators.Contains( A_CHAR("?:") ) ) {
132 // patch existing token and return
133 Substring backup= scanner;
134 if( scanner.TrimStart().CharAtStart() == ':
' ) {
135 tokString= A_CHAR("?:");
136 scanner.ConsumeChar();
144 //-------------------------------------- alphabetic operators ------------------------------------
145 if( isalpha( first ) ) {
147 while( len < scanner.Length() && ( isalpha( scanner[len] ) || scanner[len] == '_
' ) )
149 tokString= scanner.Substring<NC>( 0, len );
150 auto hashCode= tokString.HashcodeIgnoreCase();
154 decltype(unaryOperators)::Iterator it;
155 if( (it= unaryOperators .Find( tokString, hashCode )) != unaryOperators.end()
156 && ( HasBits(compiler.CfgCompilation, Compilation::AlphabeticOperatorsIgnoreCase)
157 || tokString.Equals<NC>( it.Value() ) ) )
159 scanner.ConsumeChars<NC>( tokString.Length() );
160 token= Tokens::AlphaUnOp;
166 decltype(binaryOperators)::Iterator it;
167 if( (it= binaryOperators .Find( tokString, hashCode )) != binaryOperators.end()
168 && ( HasBits(compiler.CfgCompilation, Compilation::AlphabeticOperatorsIgnoreCase)
169 || tokString.Equals<NC>( it.Value() ) ) )
171 scanner.ConsumeChars<NC>( tokString.Length() );
172 token= Tokens::AlphaBinOp;
178 //------------------------------------------ Identifiers -----------------------------------------
179 if( isalpha( first ) || first == '_
' ) {
180 integer endOfIdent= 0;
182 while( ++endOfIdent < scanner.Length()
183 && ( isalnum( next= scanner[endOfIdent] )
186 token= Tokens::Identifier;
187 tokString= String( expression.Buffer() + tokPosition, endOfIdent );
188 scanner.ConsumeChars<NC>( endOfIdent );
192 //-------------------------------------------- numbers -------------------------------------------
193 if( isdigit( first ) ) {
194 integer endOfDecPart= 0;
196 while( ++endOfDecPart < scanner.Length()
197 && ( isdigit( next= scanner[endOfDecPart] )
198 || ( HasBits(numberFormat->Flags, NumberFormatFlags::ReadGroupChars) && next== numberFormat->ThousandsGroupChar ) )
203 if( next == numberFormat->DecimalPointChar
206 || scanner.Substring( endOfDecPart ).StartsWith( numberFormat->ExponentSeparator ) )
209 auto oldStart= scanner.Buffer();
211 scanner.ConsumeFloat( value, numberFormat );
212 token = Tokens::LitFloat;
215 String numberParsed( oldStart, scanner.Buffer() - oldStart );
216 tokLiteralHint= numberParsed.IndexOf('e
') > 0
217 || numberParsed.IndexOf('E
') > 0
218 || numberParsed.IndexOf( numberFormat->ExponentSeparator ) > 0
219 ? ASTLiteral::NFHint::Scientific
220 : ASTLiteral::NFHint::NONE;
225 tokLiteralHint= ASTLiteral::NFHint::NONE;
226 if( numberFormat->HexLiteralPrefix.IsNotEmpty()
227 && scanner.StartsWith( numberFormat->HexLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Hexadecimal;
228 else if( numberFormat->OctLiteralPrefix.IsNotEmpty()
229 && scanner.StartsWith( numberFormat->OctLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Octal;
230 else if( numberFormat->BinLiteralPrefix.IsNotEmpty()
231 && scanner.StartsWith( numberFormat->BinLiteralPrefix ) ) tokLiteralHint= ASTLiteral::NFHint::Binary;
234 scanner.ConsumeInt( value, numberFormat );
235 token= Tokens::LitInteger;
242 //-------------------------------------------- Strings -------------------------------------------
244 bool lastWasSlash= false;
245 scanner.ConsumeChar<NC>();
247 while( (next= scanner.ConsumeChar()) != '\0' ) {
248 if( next == '\\' ) { lastWasSlash= true; continue; }
249 if( next == '\"' && !lastWasSlash ) break;
254 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation,
255 EXPRESSIONS.GetResource("EE4") );
256 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo,
257 expression, expression.Length() - scanner.Length() );
261 String quoted( expression.Buffer() + tokPosition + 1,
262 expression.Length() - scanner.Length() - tokPosition -2 );
263 token = Tokens::LitString;
264 tokString.Allocate(compileTimeAllocator, String1K(quoted) << Escape( lang::Switch::Off ) );
268 //--------------------------------------- unrecognized token -------------------------------------
269 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxError );
270 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, expression.Length() - scanner.Length() );
276//##################################################################################################
278//##################################################################################################
279#define Start parseConditional
281detail::AST* ParserImpl::Parse( const String& exprString, NumberFormat* nf ) {
282 if( exprString.IsEmpty() )
283 throw Exception( ALIB_CALLER, Exceptions::EmptyExpressionString );
285 expression = exprString;
287 ASTs = compileTimeAllocator().New<StdVectorMA<AST*>>( compileTimeAllocator );
294//ALIB_DBG( lexer.DbgListTokens(); )
299 // if tokens remain, an "operator" would be expected
300 if( token != Tokens::EOT ) {
301 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE5") );
302 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
310AST* ParserImpl::parseConditional() {
311 // parse lhs as simple
312 push( parseBinary() ); // Q
314 integer qmPosition= tokPosition;
317 if( token == Tokens::SymbolicOp && tokString == A_CHAR("?") ) {
319 push( Start() ); // T
322 if( token != Tokens::SymbolicOp || tokString != A_CHAR(":") ) {
323 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE6") );
324 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
327 integer colonPosition= tokPosition;
334 return compileTimeAllocator().New<ASTConditional>(Q, T, F, qmPosition, colonPosition );
337 // was no conditional
341AST* ParserImpl::parseBinary() {
342 // parse lhs as simple
343 push( parseSimple() );
346 integer position= tokPosition;
349 binOp= getBinaryOp();
353 // rhs is braced? -> lhs becomes <lhs op rhs> and we start over
354 if( token == Tokens::BraceOpen ) {
355 replace( compileTimeAllocator().New<ASTBinaryOp>(binOp, top(), parseSimple(), position ) );
356 position= tokPosition;
362 // check if tokens remain
363 if( token == Tokens::EOT ) {
364 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource("EE7") );
365 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
370 AST* rhs= push( parseBinary() );
372 int binOpPrecedence= compiler.GetBinaryOperatorPrecedence( binOp );
374 ASTBinaryOp* parent = nullptr;
375 while( replace->NodeType == AST::Types::BinaryOp
376 && compiler.GetBinaryOperatorPrecedence(dynamic_cast<ASTBinaryOp*>(replace)->Operator) <= binOpPrecedence )
378 parent = dynamic_cast<ASTBinaryOp*>(replace);
379 replace= parent->Lhs;
384 if( parent == nullptr )
385 return compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, rhs, position );
387 // insert binary at lhs of deepest equal-level binary found.
388 // Its current lhs becomes its new lhs-child's rhs.
389 parent->Lhs= compileTimeAllocator().New<ASTBinaryOp>( binOp, lhs, parent->Lhs, position );
395 if( token == Tokens::BraceOpen ) {
399 if( token != Tokens::BraceClose ) {
400 Exception e( ALIB_CALLER_NULLED, Exceptions::SyntaxErrorExpectation, EXPRESSIONS.GetResource(
"EE1"));
401 e.Add ( ALIB_CALLER_NULLED, Exceptions::ExpressionInfo, expression, tokPosition );
405 replace( parseSubscript( top() ) );
412 String unOp= getUnaryOp();
413 if( unOp.IsNotNull() ) {
414 push( compileTimeAllocator().New<ASTUnaryOp>(unOp, parseSimple(), position ) );
415 replace( parseSubscript( top() ) );
420 if( token == Tokens::LitInteger ) { push(compileTimeAllocator().New<ASTLiteral>(tokInteger, position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) );
return pop(); }
421 if( token == Tokens::LitFloat ) { push(compileTimeAllocator().New<ASTLiteral>(tokFloat , position, tokLiteralHint ) ); NextToken(); replace( parseSubscript(top()) );
return pop(); }
422 if( token == Tokens::LitString ) { push(compileTimeAllocator().New<ASTLiteral>(
String(compileTimeAllocator, tokString), position )); NextToken(); replace( parseSubscript(top()) );
return pop(); }
425 if( token == Tokens::Identifier || token == Tokens::AlphaBinOp ) {
430 if( token == Tokens::BraceOpen ) {
431 ASTFunction* astFunction= compileTimeAllocator().New<ASTFunction>( name, position, compileTimeAllocator );
435 if( token == Tokens::BraceClose ) {
439 astFunction->Arguments.emplace_back( Start() );
441 if( token == Tokens::Comma )
444 if( token != Tokens::BraceClose ) {
451 replace( parseSubscript( astFunction ) );
456 replace( parseSubscript( push(compileTimeAllocator().New<ASTIdentifier>(
String(compileTimeAllocator, name), position ) ) ) );
461 if( token == Tokens::EOT ) {
467 if( token == Tokens::BraceClose ) {
473 if( token == Tokens::SubscriptOpen || token == Tokens::SubscriptClose ) {
479 if( token == Tokens::Comma ) {
485 ALIB_ERROR(
"EXPR",
"Internal error. This should never happen." )
521 for(
integer partialRead= 1 ; partialRead <=
tokString.Length() ; ++partialRead ) {
540 return alphabeticOperator;
554 for (
integer partialRead =
tokString.Length(); partialRead > 0; --partialRead ) {
574 return alphabeticOperator;
const String & GetResource(const NString &name)
Exception & Add(const lang::CallerInfo &ci, TEnum type, TArgs &&... args)
Tokens token
The actual token type.
ALIB_DLL String getUnaryOp()
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > unaryOperators
Compiler & compiler
The compiler that this parser works for.
AST * parseSubscript(AST *function)
ALIB_DLL String getBinaryOp()
BitSet< 256 > syntaxTokens
String expression
The given expression to parse.
integer tokPosition
The position of the token in expression.
String tokString
String value of token (if applicable).
ParserImpl(Compiler &compiler, MonoAllocator &allocator)
BitSet< 256 > operatorChars
@ SubscriptClose
A closing subscript brace.
@ AlphaBinOp
An alphabetic binary operator.
@ SymbolicOp
A symbolic operator. Can be unary or binary.
@ AlphaUnOp
An alphabetic unary operator.
@ SubscriptOpen
An opening subscript brace.
HashSet< MonoAllocator, String, alib::hash_string_ignore_case< character >, alib::equal_to_string_ignore_case< character > > binaryOperators
void NextToken()
This is the "scanner" or "lexer" method.
MonoAllocator & compileTimeAllocator
#define ALIB_CALLER_NULLED
#define ALIB_ERROR(domain,...)
@ UnknownBinaryOperatorSymbol
Unknown binary operator symbol found when parsing expression string.
@ UnknownUnaryOperatorSymbol
Unknown unary operator symbol found when parsing expression string.
@ SyntaxErrorExpectation
Syntax error with concrete information about what the parser expected at given position.
constexpr String NULL_STRING
A nulled string of the default character type.
lang::integer integer
Type alias in namespace alib.
monomem::TMonoAllocator< lang::HeapAllocator > MonoAllocator
exceptions::Exception Exception
Type alias in namespace alib.
strings::TString< character > String
Type alias in namespace alib.
expressions::ExpressionsCamp EXPRESSIONS
The singleton instance of ALib Camp class ExpressionsCamp.
strings::TSubstring< character > Substring
Type alias in namespace alib.
Abstract syntax tree node representing binary operators.