From 031419b0cf76f16acd07697c25c8d35e92866ea2 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 22 Jun 2023 15:04:47 +0200 Subject: [PATCH 1/5] Add an error message for empty literal/regex declarations, also fix to accept "'\''" literal. --- src/lalr/ErrorCode.hpp | 1 + src/lalr/GrammarParser.cpp | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/lalr/ErrorCode.hpp b/src/lalr/ErrorCode.hpp index 4ed44c3..0fcab63 100644 --- a/src/lalr/ErrorCode.hpp +++ b/src/lalr/ErrorCode.hpp @@ -12,6 +12,7 @@ enum ErrorCode PARSER_ERROR_NONE, ///< No %error. LALR_ERROR_SYNTAX, ///< Syntax %error occured while parsing input. LALR_ERROR_UNTERMINATED_LITERAL, ///< Unterminated literal in an lalr grammar. + LALR_ERROR_EMPTY_LITERAL, ///< Empty literal in an lalr grammar. LEXER_ERROR_MISSING_ACTION_HANDLER, ///< A lexer action hasn't been bound to a function. LEXER_ERROR_SYNTAX, ///< Syntax %error occured while parsing some input. LEXER_ERROR_SYMBOL_CONFLICT, ///< A lexer state matches more than one symbol. diff --git a/src/lalr/GrammarParser.cpp b/src/lalr/GrammarParser.cpp index 7c369f3..6564190 100644 --- a/src/lalr/GrammarParser.cpp +++ b/src/lalr/GrammarParser.cpp @@ -228,12 +228,21 @@ bool GrammarParser::match_literal() { escaped = *position == '\\'; ++position; + if ( *position == '\\' && escaped ) + { + ++position; + escaped = false; + } } if ( position == end_ || !is_new_line(position) ) { lexeme_.assign( position_, position ); position_ = position; expect( "'" ); + if ( lexeme_.empty() ) + { + error( LALR_ERROR_EMPTY_LITERAL, "empty literal" ); + } return true; } error( LALR_ERROR_UNTERMINATED_LITERAL, "unterminated literal" ); @@ -253,10 +262,19 @@ bool GrammarParser::match_regex() { escaped = *position == '\\'; ++position; + if (*position == '\\' && escaped) + { + ++position; + escaped = false; + } } lexeme_.assign( position_, position ); position_ = position; expect( "\"" ); + if ( lexeme_.empty() ) + { + error( LALR_ERROR_EMPTY_LITERAL, "empty regex" ); + } return true; } return false; From b2224991c9f22ecc0ddde6afa5fba71ce0bac6c6 Mon Sep 17 00:00:00 2001 From: Charles Baker Date: Sun, 16 Jul 2023 08:12:50 +1200 Subject: [PATCH 2/5] Advance position when encountering an unterminated literal --- src/lalr/GrammarParser.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lalr/GrammarParser.cpp b/src/lalr/GrammarParser.cpp index 6564190..9736278 100644 --- a/src/lalr/GrammarParser.cpp +++ b/src/lalr/GrammarParser.cpp @@ -246,6 +246,7 @@ bool GrammarParser::match_literal() return true; } error( LALR_ERROR_UNTERMINATED_LITERAL, "unterminated literal" ); + position_ = position; return false; } return false; From 638de559ca9d8e808ad4ad5ee84d6993217c12f9 Mon Sep 17 00:00:00 2001 From: Charles Baker Date: Sun, 16 Jul 2023 15:03:35 +1200 Subject: [PATCH 3/5] Report errors for unterminated regular expressions --- src/lalr/GrammarParser.cpp | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/lalr/GrammarParser.cpp b/src/lalr/GrammarParser.cpp index 9736278..80861a0 100644 --- a/src/lalr/GrammarParser.cpp +++ b/src/lalr/GrammarParser.cpp @@ -234,6 +234,7 @@ bool GrammarParser::match_literal() escaped = false; } } + if ( position == end_ || !is_new_line(position) ) { lexeme_.assign( position_, position ); @@ -245,6 +246,7 @@ bool GrammarParser::match_literal() } return true; } + error( LALR_ERROR_UNTERMINATED_LITERAL, "unterminated literal" ); position_ = position; return false; @@ -259,24 +261,32 @@ bool GrammarParser::match_regex() { bool escaped = false; const char* position = position_; - while ( position != end_ && (*position != '"' || escaped) ) + while ( position != end_ && (*position != '"' || escaped) && !is_new_line(position) ) { escaped = *position == '\\'; ++position; - if (*position == '\\' && escaped) + if ( *position == '\\' && escaped ) { ++position; escaped = false; } } - lexeme_.assign( position_, position ); - position_ = position; - expect( "\"" ); - if ( lexeme_.empty() ) + + if ( position == end_ || !is_new_line(position) ) { - error( LALR_ERROR_EMPTY_LITERAL, "empty regex" ); + lexeme_.assign( position_, position ); + position_ = position; + expect( "\"" ); + if ( lexeme_.empty() ) + { + error( LALR_ERROR_EMPTY_LITERAL, "empty regex" ); + } + return true; } - return true; + + error( LALR_ERROR_UNTERMINATED_LITERAL, "unterminated regex" ); + position_ = position; + return false; } return false; } From 919b0fe36b70622665249cfef45ae30069e0d1c3 Mon Sep 17 00:00:00 2001 From: Charles Baker Date: Sun, 16 Jul 2023 15:04:13 +1200 Subject: [PATCH 4/5] Improve parser tests that check for errors --- src/lalr/lalr_test/TestParsers.cpp | 59 +++++++++++++----------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/src/lalr/lalr_test/TestParsers.cpp b/src/lalr/lalr_test/TestParsers.cpp index e65cfaf..d40e9f1 100644 --- a/src/lalr/lalr_test/TestParsers.cpp +++ b/src/lalr/lalr_test/TestParsers.cpp @@ -64,44 +64,35 @@ SUITE( Parsers ) } }; - struct CheckParserErrorPolicy : public ErrorPolicy + struct CheckErrorPolicy : public ErrorPolicy { - int expected_error; - int errors; + std::vector expected_errors_; + int errors; - CheckParserErrorPolicy( int expected_error ) - : expected_error( expected_error ) + CheckErrorPolicy( int expected_error ) + : expected_errors_() , errors( 0 ) { + expected_errors_.push_back( expected_error ); } - void lalr_error( int /*line*/, int /*column*/, int error, const char* /*format*/, va_list /*args*/ ) - { - (void) error; - ++errors; - CHECK( error == expected_error ); - } - }; - - struct CheckLexerErrorPolicy : public ErrorPolicy - { - int expected_error; - int errors; - - CheckLexerErrorPolicy( int expected_error ) - : expected_error( expected_error ) + CheckErrorPolicy( std::initializer_list expected_errors ) + : expected_errors_() , errors( 0 ) { + expected_errors_.insert( expected_errors_.end(), expected_errors.begin(), expected_errors.end() ); } void lalr_error( int /*line*/, int /*column*/, int error, const char* /*format*/, va_list /*args*/ ) { - (void) error; + if ( errors < int(expected_errors_.size()) ) + { + CHECK_EQUAL( expected_errors_[errors], error ); + } ++errors; - CHECK( error == expected_error ); } }; - + TEST( OrOperator ) { const char* or_grammar = @@ -813,7 +804,7 @@ SUITE( Parsers ) "}" ; - CheckParserErrorPolicy error_policy( PARSER_ERROR_SYNTAX ); + CheckErrorPolicy error_policy( PARSER_ERROR_SYNTAX ); ParserStateMachine parser_state_machine( missing_open_brace, missing_open_brace + strlen(missing_open_brace), &error_policy ); CHECK( error_policy.errors == 1 ); CHECK( parser_state_machine.start_state() == NULL ); @@ -830,7 +821,7 @@ SUITE( Parsers ) "}" ; - CheckParserErrorPolicy error_policy( PARSER_ERROR_SYNTAX ); + CheckErrorPolicy error_policy( PARSER_ERROR_SYNTAX ); ParserStateMachine parser_state_machine( missing_close_quotes, missing_close_quotes + strlen(missing_close_quotes), &error_policy ); CHECK( error_policy.errors == 2 ); CHECK( parser_state_machine.start_state() == NULL ); @@ -848,7 +839,7 @@ SUITE( Parsers ) "}" ; - CheckLexerErrorPolicy error_policy( LEXER_ERROR_SYNTAX ); + CheckErrorPolicy error_policy( LEXER_ERROR_SYNTAX ); GrammarCompiler compiler; compiler.compile( syntax_errors_in_regular_expressions_grammar, @@ -867,7 +858,7 @@ SUITE( Parsers ) "}" ; - CheckParserErrorPolicy error_policy( PARSER_ERROR_UNDEFINED_SYMBOL ); + CheckErrorPolicy error_policy( PARSER_ERROR_UNDEFINED_SYMBOL ); GrammarCompiler compiler; compiler.compile( undefined_symbol_grammar, undefined_symbol_grammar + strlen(undefined_symbol_grammar), &error_policy ); CHECK( error_policy.errors == 1 ); @@ -882,7 +873,7 @@ SUITE( Parsers ) "}" ; - CheckParserErrorPolicy error_policy( PARSER_ERROR_UNREFERENCED_SYMBOL ); + CheckErrorPolicy error_policy( PARSER_ERROR_UNREFERENCED_SYMBOL ); GrammarCompiler compiler; compiler.compile( unreferenced_symbol_error_grammar, @@ -913,7 +904,7 @@ SUITE( Parsers ) ; GrammarCompiler compiler; - CheckParserErrorPolicy error_policy( PARSER_ERROR_NONE ); + CheckErrorPolicy error_policy( PARSER_ERROR_NONE ); compiler.compile( precedence_directive_symbols_grammar, precedence_directive_symbols_grammar + strlen(precedence_directive_symbols_grammar), &error_policy ); CHECK( error_policy.errors == 0 ); } @@ -929,7 +920,7 @@ SUITE( Parsers ) " prototype: \"[A-Za-z_][A-Za-z_0-9]*\"; value: \"[A-Za-z_0-9\\./@:-]+\";\n" "}" ; - CheckLexerErrorPolicy error_policy( LEXER_ERROR_SYMBOL_CONFLICT ); + CheckErrorPolicy error_policy( LEXER_ERROR_SYMBOL_CONFLICT ); GrammarCompiler compiler; compiler.compile( lexer_conflict_grammar, lexer_conflict_grammar + strlen(lexer_conflict_grammar), &error_policy ); CHECK( error_policy.errors == 1 ); @@ -971,7 +962,7 @@ SUITE( Parsers ) ; GrammarCompiler compiler; - CheckParserErrorPolicy error_policy( PARSER_ERROR_NONE ); + CheckErrorPolicy error_policy( PARSER_ERROR_NONE ); compiler.compile( lexer_conflict_grammar, lexer_conflict_grammar + strlen(lexer_conflict_grammar), &error_policy ); CHECK( error_policy.errors == 0 ); @@ -1003,7 +994,7 @@ SUITE( Parsers ) "}" ; - CheckParserErrorPolicy error_policy( PARSER_ERROR_PARSE_TABLE_CONFLICT ); + CheckErrorPolicy error_policy( PARSER_ERROR_PARSE_TABLE_CONFLICT ); GrammarCompiler compiler; compiler.compile( associativity_grammar, associativity_grammar + strlen(associativity_grammar), &error_policy ); CHECK( error_policy.errors == 0 ); @@ -1045,7 +1036,7 @@ SUITE( Parsers ) ; GrammarCompiler compiler; - CheckParserErrorPolicy error_policy( PARSER_ERROR_PARSE_TABLE_CONFLICT ); + CheckErrorPolicy error_policy( PARSER_ERROR_PARSE_TABLE_CONFLICT ); compiler.compile( precedence_grammar, precedence_grammar + strlen(precedence_grammar), &error_policy ); CHECK( error_policy.errors == 0 ); @@ -1071,7 +1062,7 @@ SUITE( Parsers ) ; GrammarCompiler compiler; - CheckParserErrorPolicy error_policy( PARSER_ERROR_ERROR_SYMBOL_ON_LEFT_HAND_SIDE ); + CheckErrorPolicy error_policy( PARSER_ERROR_ERROR_SYMBOL_ON_LEFT_HAND_SIDE ); compiler.compile( grammar, grammar + strlen(grammar), &error_policy ); CHECK( error_policy.errors == 1 ); } From 7b1a3ab372267cae011ac5dea0344702387636df Mon Sep 17 00:00:00 2001 From: Charles Baker Date: Sun, 16 Jul 2023 08:11:36 +1200 Subject: [PATCH 5/5] Test errors reported for empty literals and regular expressions --- src/lalr/lalr_test/TestParsers.cpp | 119 ++++++++++++++++++ .../lalr_test/TestPrecedenceDirectives.cpp | 28 +++-- 2 files changed, 139 insertions(+), 8 deletions(-) diff --git a/src/lalr/lalr_test/TestParsers.cpp b/src/lalr/lalr_test/TestParsers.cpp index d40e9f1..422c8b3 100644 --- a/src/lalr/lalr_test/TestParsers.cpp +++ b/src/lalr/lalr_test/TestParsers.cpp @@ -93,6 +93,26 @@ SUITE( Parsers ) } }; + struct CollectErrorPolicy : public ErrorPolicy + { + std::vector errors; + + CollectErrorPolicy() + : errors{} + { + } + + int error( int index ) const + { + return index >= 0 && index < int(errors.size()) ? errors[index] : PARSER_ERROR_NONE; + } + + void lalr_error( int /*line*/, int /*column*/, int error, const char* /*format*/, va_list /*args*/ ) + { + errors.push_back( error ); + } + }; + TEST( OrOperator ) { const char* or_grammar = @@ -1274,4 +1294,103 @@ SUITE( Parsers ) CHECK( parser.accepted() ); CHECK( parser.full() ); } + + TEST( UnterminatedLiteral ) + { + const char* unterminated_literal = + "UnterminatedLiteral {\n" + " unterminated: 'abc;\n" + "}\n" + ; + + CheckErrorPolicy error_policy{ LALR_ERROR_UNTERMINATED_LITERAL }; + GrammarCompiler compiler; + int errors = compiler.compile( unterminated_literal, unterminated_literal + strlen(unterminated_literal), &error_policy ); + CHECK( errors > 0 ); + Parser parser( compiler.parser_state_machine() ); + CHECK( !parser.valid() ); + } + + TEST( UnterminatedRegularExpression ) + { + const char* unterminated_regex = + "UnterminatedRegularExpression {\n" + " unterminated: \"abc;\n" + "}\n" + ; + + CheckErrorPolicy error_policy{ LALR_ERROR_UNTERMINATED_LITERAL }; + GrammarCompiler compiler; + int errors = compiler.compile( unterminated_regex, unterminated_regex + strlen(unterminated_regex), &error_policy ); + CHECK( errors > 0 ); + Parser parser( compiler.parser_state_machine() ); + CHECK( !parser.valid() ); + } + + TEST( UnterminatedWhitespace ) + { + const char* unterminate_whitespace = + "UnterminatedWhitespace {\n" + " %whitespace \"abc;\n" + "}\n" + ; + + CheckErrorPolicy error_policy{ LALR_ERROR_UNTERMINATED_LITERAL }; + GrammarCompiler compiler; + int errors = compiler.compile( unterminate_whitespace, unterminate_whitespace + strlen(unterminate_whitespace), &error_policy ); + CHECK( errors > 0 ); + Parser parser( compiler.parser_state_machine() ); + CHECK( !parser.valid() ); + } + + TEST( EmptyLiteral ) + { + const char* empty_literal = + "EmptyLiteral {\n" + " empty: '';\n" + "}\n" + ; + + CollectErrorPolicy error_policy; + GrammarCompiler compiler; + int errors = compiler.compile( empty_literal, empty_literal + strlen(empty_literal), &error_policy ); + CHECK( errors > 0 ); + CHECK_EQUAL( LALR_ERROR_EMPTY_LITERAL, error_policy.error(0) ); + Parser parser( compiler.parser_state_machine() ); + CHECK( !parser.valid() ); + } + + TEST( EmptyRegularExpression ) + { + const char* empty_regex = + "EmptyRegularExpression {\n" + " empty: \"\";\n" + "}\n" + ; + + CollectErrorPolicy error_policy; + GrammarCompiler compiler; + int errors = compiler.compile( empty_regex, empty_regex + strlen(empty_regex), &error_policy ); + CHECK( errors > 0 ); + CHECK_EQUAL( LALR_ERROR_EMPTY_LITERAL, error_policy.error(0) ); + Parser parser( compiler.parser_state_machine() ); + CHECK( !parser.valid() ); + } + + TEST( EmptyWhitespace ) + { + const char* empty_whitespace = + "EmptyWhitespace {\n" + " %whitespace \"\";\n" + "}\n" + ; + + CollectErrorPolicy error_policy; + GrammarCompiler compiler; + int errors = compiler.compile( empty_whitespace, empty_whitespace + strlen(empty_whitespace), &error_policy ); + CHECK( errors > 0 ); + CHECK_EQUAL( LALR_ERROR_EMPTY_LITERAL, error_policy.error(0) ); + Parser parser( compiler.parser_state_machine() ); + CHECK( !parser.valid() ); + } } diff --git a/src/lalr/lalr_test/TestPrecedenceDirectives.cpp b/src/lalr/lalr_test/TestPrecedenceDirectives.cpp index 0abb829..8831598 100644 --- a/src/lalr/lalr_test/TestPrecedenceDirectives.cpp +++ b/src/lalr/lalr_test/TestPrecedenceDirectives.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include @@ -17,22 +19,31 @@ SUITE( PrecedenceDirectives ) { struct EventSink : public ErrorPolicy { - int expected_error_; + std::vector expected_errors_; int errors_; EventSink( int expected_error ) - : expected_error_( expected_error ) + : expected_errors_() , errors_( 0 ) { + expected_errors_.push_back( expected_error ); + } + + EventSink( std::initializer_list expected_errors ) + : expected_errors_() + , errors_( 0 ) + { + expected_errors_.insert( expected_errors_.end(), expected_errors.begin(), expected_errors.end() ); } void lalr_error( int /*line*/, int /*column*/, int error, const char* /*format*/, va_list /*args*/ ) { + CHECK( errors_ < int(expected_errors_.size()) ); + if ( errors_ < int(expected_errors_.size()) ) + { + CHECK_EQUAL( expected_errors_[errors_], error ); + } ++errors_; - // char message [1024]; - // vsnprintf( message, sizeof(message), format, args ); - // printf( "%s\n", message ); - CHECK( error == expected_error_ ); } }; @@ -92,8 +103,9 @@ SUITE( PrecedenceDirectives ) "%left 'return' 'break' 'continue' 'if' 'while' 'for' identifier '{'; \n" "} \n" ; - EventSink event_sink( LALR_ERROR_UNTERMINATED_LITERAL ); + EventSink event_sink{ LALR_ERROR_UNTERMINATED_LITERAL, LALR_ERROR_SYNTAX, LALR_ERROR_SYNTAX }; GrammarCompiler compiler; - compiler.compile( grammar, grammar + strlen(grammar), &event_sink ); + int errors = compiler.compile( grammar, grammar + strlen(grammar), &event_sink ); + CHECK( errors > 0 ); } }