diff --git a/src/lalr/ErrorPolicy.cpp b/src/lalr/ErrorPolicy.cpp index d458f0d..793ac8e 100644 --- a/src/lalr/ErrorPolicy.cpp +++ b/src/lalr/ErrorPolicy.cpp @@ -33,9 +33,9 @@ ErrorPolicy::~ErrorPolicy() // @param ... // Arguments as described by *format*. */ -void ErrorPolicy::lalr_error( int line, int /*column*/, int /*error*/, const char* format, va_list args ) +void ErrorPolicy::lalr_error( int line, int column, int /*error*/, const char* format, va_list args ) { - fprintf( stderr, "lalr (%d): ERROR: ", line ); + fprintf( stderr, "lalr (%d:%d): ERROR: ", line, column ); vfprintf( stderr, format, args ); fprintf( stderr, "\n" ); } diff --git a/src/lalr/Grammar.cpp b/src/lalr/Grammar.cpp index 7adc17a..1a146e9 100644 --- a/src/lalr/Grammar.cpp +++ b/src/lalr/Grammar.cpp @@ -39,10 +39,10 @@ Grammar::Grammar() , error_symbol_( nullptr ) , whitespace_symbol_( nullptr ) { - start_symbol_ = add_symbol( ".start", 0, LEXEME_NULL, SYMBOL_NON_TERMINAL ); - end_symbol_ = add_symbol( ".end", 0, LEXEME_NULL, SYMBOL_END ); - error_symbol_ = add_symbol( "error", 0, LEXEME_NULL, SYMBOL_NULL ); - whitespace_symbol_ = add_symbol( ".whitespace", 0, LEXEME_NULL, SYMBOL_NULL ); + start_symbol_ = add_symbol( ".start", 0, 0, LEXEME_NULL, SYMBOL_NON_TERMINAL ); + end_symbol_ = add_symbol( ".end", 0, 0, LEXEME_NULL, SYMBOL_END ); + error_symbol_ = add_symbol( "error", 0, 0, LEXEME_NULL, SYMBOL_NULL ); + whitespace_symbol_ = add_symbol( ".whitespace", 0, 0, LEXEME_NULL, SYMBOL_NULL ); } Grammar::~Grammar() @@ -153,14 +153,14 @@ Grammar& Grammar::precedence() return *this; } -Grammar& Grammar::production( const char* identifier, int line ) +Grammar& Grammar::production( const char* identifier, int line, int column ) { LALR_ASSERT( identifier ); associativity_ = ASSOCIATE_NULL; active_whitespace_directive_ = false; active_precedence_directive_ = false; active_production_ = nullptr; - active_symbol_ = non_terminal_symbol( identifier, line ); + active_symbol_ = non_terminal_symbol( identifier, line, column ); return *this; } @@ -175,44 +175,44 @@ Grammar& Grammar::end_production() return *this; } -Grammar& Grammar::end_expression( int line ) +Grammar& Grammar::end_expression(int line, int column) { LALR_ASSERT( line >= 1 ); // If there is an active symbol but no active production then an empty - // production is being specified (the nil action marks the end of a + // production is being specified (the nil action marks the end of a // production for which no symbols have been specified). if ( active_symbol_ ) { if ( !active_production_ ) { - active_production_ = add_production( active_symbol_, line ); + active_production_ = add_production( active_symbol_, line, column ); } } active_production_ = nullptr; return *this; } -Grammar& Grammar::error( int line ) +Grammar& Grammar::error( int line, int column ) { LALR_ASSERT( line >= 1 ); if ( associativity_ != ASSOCIATE_NULL ) { GrammarSymbol* symbol = error_symbol(); - symbol->set_associativity( associativity_ ); - symbol->set_precedence( precedence_ ); + symbol->set_associativity( associativity_ ); + symbol->set_precedence( precedence_ ); } else if ( active_symbol_ ) { if ( !active_production_ ) { - active_production_ = add_production( active_symbol_, line ); + active_production_ = add_production( active_symbol_, line, column ); } - active_production_->append_symbol( error_symbol() ); + active_production_->append_symbol( error_symbol() ); } return *this; } -Grammar& Grammar::action( const char* identifier, int line ) +Grammar& Grammar::action(const char* identifier, int line, int column) { LALR_ASSERT( identifier ); LALR_ASSERT( line >= 1 ); @@ -221,7 +221,7 @@ Grammar& Grammar::action( const char* identifier, int line ) { if ( !active_production_ ) { - active_production_ = add_production( active_symbol_, line ); + active_production_ = add_production(active_symbol_, line, column ); } active_production_->set_action( add_action(identifier) ); active_production_ = nullptr; @@ -229,7 +229,7 @@ Grammar& Grammar::action( const char* identifier, int line ) return *this; } -Grammar& Grammar::literal( const char* literal, int line ) +Grammar& Grammar::literal(const char* literal, int line, int column) { LALR_ASSERT( literal ); LALR_ASSERT( line >= 0 ); @@ -240,7 +240,7 @@ Grammar& Grammar::literal( const char* literal, int line ) } else if ( associativity_ != ASSOCIATE_NULL ) { - GrammarSymbol* symbol = literal_symbol( literal, line ); + GrammarSymbol* symbol = literal_symbol( literal, line, column ); symbol->set_associativity( associativity_ ); symbol->set_precedence( precedence_ ); } @@ -248,24 +248,24 @@ Grammar& Grammar::literal( const char* literal, int line ) { if ( !active_production_ ) { - active_production_ = add_production( active_symbol_, line ); - } + active_production_ = add_production( active_symbol_, line, column ); + } if ( active_precedence_directive_ ) { - GrammarSymbol* symbol = literal_symbol( literal, line ); + GrammarSymbol* symbol = literal_symbol( literal, line, column ); active_production_->set_precedence_symbol( symbol ); symbol->set_referenced_in_precedence_directive( true ); active_precedence_directive_ = false; } else { - active_production_->append_symbol( literal_symbol(literal, line) ); + active_production_->append_symbol( literal_symbol(literal, line, column) ); } } return *this; } -Grammar& Grammar::regex( const char* regex, int line ) +Grammar& Grammar::regex( const char* regex, int line, int column ) { LALR_ASSERT( regex ); LALR_ASSERT( line >= 0 ); @@ -276,7 +276,7 @@ Grammar& Grammar::regex( const char* regex, int line ) } else if ( associativity_ != ASSOCIATE_NULL ) { - GrammarSymbol* symbol = regex_symbol( regex, line ); + GrammarSymbol* symbol = regex_symbol( regex, line, column ); symbol->set_associativity( associativity_ ); symbol->set_precedence( precedence_ ); } @@ -284,31 +284,31 @@ Grammar& Grammar::regex( const char* regex, int line ) { if ( !active_production_ ) { - active_production_ = add_production( active_symbol_, line ); + active_production_ = add_production( active_symbol_, line, column ); } if ( active_precedence_directive_ ) { - GrammarSymbol* symbol = regex_symbol( regex, line ); + GrammarSymbol* symbol = regex_symbol( regex, line, column ); symbol->set_referenced_in_precedence_directive( true ); active_production_->set_precedence_symbol( symbol ); active_precedence_directive_ = false; } else { - active_production_->append_symbol( regex_symbol(regex, line) ); + active_production_->append_symbol( regex_symbol(regex, line, column) ); } } return *this; } -Grammar& Grammar::identifier( const char* identifier, int line ) +Grammar& Grammar::identifier( const char* identifier, int line, int column ) { LALR_ASSERT( identifier ); LALR_ASSERT( line >= 0 ); LALR_ASSERT( active_symbol_ || associativity_ != ASSOCIATE_NULL ); if ( associativity_ != ASSOCIATE_NULL ) { - GrammarSymbol* symbol = non_terminal_symbol( identifier, line ); + GrammarSymbol* symbol = non_terminal_symbol( identifier, line, column ); symbol->set_associativity( associativity_ ); symbol->set_precedence( precedence_ ); } @@ -316,45 +316,46 @@ Grammar& Grammar::identifier( const char* identifier, int line ) { if ( !active_production_ ) { - active_production_ = add_production( active_symbol_, line ); + active_production_ = add_production( active_symbol_, line, column ); } if ( active_precedence_directive_ ) { - GrammarSymbol* symbol = non_terminal_symbol( identifier, line ); + GrammarSymbol* symbol = non_terminal_symbol( identifier, line, column ); symbol->set_referenced_in_precedence_directive( true ); active_production_->set_precedence_symbol( symbol ); active_precedence_directive_ = false; } else { - active_production_->append_symbol( non_terminal_symbol(identifier, line) ); + GrammarSymbol* symbol = non_terminal_symbol( identifier, line, column ); + active_production_->append_symbol( symbol ); } } return *this; } -GrammarSymbol* Grammar::literal_symbol( const char* lexeme, int line ) +GrammarSymbol* Grammar::literal_symbol( const char* lexeme, int line, int column ) { LALR_ASSERT( lexeme ); LALR_ASSERT( line >= 0 ); - return add_symbol( lexeme, line, LEXEME_LITERAL, SYMBOL_TERMINAL ); + return add_symbol(lexeme, line, column, LEXEME_LITERAL, SYMBOL_TERMINAL ); } -GrammarSymbol* Grammar::regex_symbol( const char* lexeme, int line ) +GrammarSymbol* Grammar::regex_symbol( const char* lexeme, int line, int column ) { LALR_ASSERT( lexeme ); LALR_ASSERT( line >= 0 ); - return add_symbol( lexeme, line, LEXEME_REGULAR_EXPRESSION, SYMBOL_TERMINAL ); + return add_symbol(lexeme, line, column, LEXEME_REGULAR_EXPRESSION, SYMBOL_TERMINAL ); } -GrammarSymbol* Grammar::non_terminal_symbol( const char* lexeme, int line ) +GrammarSymbol* Grammar::non_terminal_symbol( const char* lexeme, int line, int column ) { LALR_ASSERT( lexeme ); LALR_ASSERT( line >= 0 ); - return add_symbol( lexeme, line, LEXEME_NULL, SYMBOL_NON_TERMINAL ); + return add_symbol(lexeme, line, column, LEXEME_NULL, SYMBOL_NON_TERMINAL ); } -GrammarSymbol* Grammar::add_symbol( const char* lexeme, int line, LexemeType lexeme_type, SymbolType symbol_type ) +GrammarSymbol* Grammar::add_symbol( const char* lexeme, int line, int column, LexemeType lexeme_type, SymbolType symbol_type ) { LALR_ASSERT( lexeme ); LALR_ASSERT( line >= 0 ); @@ -362,11 +363,12 @@ GrammarSymbol* Grammar::add_symbol( const char* lexeme, int line, LexemeType lex while ( i != symbols_.end() && !(*i)->matches(lexeme, symbol_type) ) { ++i; - } + } if ( i == symbols_.end() ) { unique_ptr symbol( new GrammarSymbol(lexeme) ); symbol->set_line( line ); + symbol->set_column( column ); symbol->set_lexeme_type( lexeme_type ); symbol->set_symbol_type( symbol_type ); symbols_.push_back( std::move(symbol) ); @@ -379,7 +381,7 @@ GrammarSymbol* Grammar::add_symbol( const char* lexeme, int line, LexemeType lex return symbol; } -GrammarProduction* Grammar::add_production( GrammarSymbol* symbol, int line ) +GrammarProduction* Grammar::add_production( GrammarSymbol* symbol, int line, int column ) { LALR_ASSERT( symbol ); LALR_ASSERT( line > 0 ); @@ -392,7 +394,7 @@ GrammarProduction* Grammar::add_production( GrammarSymbol* symbol, int line ) productions_.push_back( std::move(production) ); } - unique_ptr production( new GrammarProduction(int(productions_.size()), symbol, line, -1, nullptr) ); + unique_ptr production( new GrammarProduction(int(productions_.size()), symbol, line, column, nullptr) ); symbol->append_production( production.get() ); productions_.push_back( std::move(production) ); return productions_.back().get(); diff --git a/src/lalr/Grammar.hpp b/src/lalr/Grammar.hpp index c76648e..090d1b1 100644 --- a/src/lalr/Grammar.hpp +++ b/src/lalr/Grammar.hpp @@ -56,21 +56,21 @@ class Grammar Grammar& none( int line ); Grammar& whitespace(); Grammar& precedence(); - Grammar& production( const char* identifier, int line ); + Grammar& production( const char* identifier, int line, int column ); Grammar& end_production(); - Grammar& end_expression( int line ); - Grammar& error( int line ); - Grammar& action( const char* identifier, int line ); - Grammar& literal( const char* literal, int line ); - Grammar& regex( const char* regex, int line ); - Grammar& identifier( const char* identifier, int line ); + Grammar& end_expression( int line, int column ); + Grammar& error( int line, int column ); + Grammar& action( const char* identifier, int line, int column ); + Grammar& literal( const char* literal, int line, int column ); + Grammar& regex( const char* regex, int line, int column ); + Grammar& identifier( const char* identifier, int line, int column ); private: - GrammarSymbol* literal_symbol( const char* lexeme, int line ); - GrammarSymbol* regex_symbol( const char* lexeme, int line ); - GrammarSymbol* non_terminal_symbol( const char* lexeme, int line ); - GrammarSymbol* add_symbol( const char* lexeme, int line, LexemeType lexeme_type, SymbolType symbol_type ); - GrammarProduction* add_production( GrammarSymbol* symbol, int line ); + GrammarSymbol* literal_symbol( const char* lexeme, int line , int column ); + GrammarSymbol* regex_symbol( const char* lexeme, int line , int column ); + GrammarSymbol* non_terminal_symbol( const char* lexeme, int line , int column ); + GrammarSymbol* add_symbol( const char* lexeme, int line, int column, LexemeType lexeme_type, SymbolType symbol_type ); + GrammarProduction* add_production( GrammarSymbol* symbol, int line , int column ); GrammarAction* add_action( const char* id ); }; diff --git a/src/lalr/GrammarParser.cpp b/src/lalr/GrammarParser.cpp index a1a53e6..7c369f3 100644 --- a/src/lalr/GrammarParser.cpp +++ b/src/lalr/GrammarParser.cpp @@ -19,6 +19,7 @@ GrammarParser::GrammarParser() , grammar_( nullptr ) , position_( nullptr ) , end_( nullptr ) +, line_position_( nullptr ) , line_( 1 ) , lexeme_() , errors_( 0 ) @@ -34,13 +35,13 @@ int GrammarParser::parse( const char* start, const char* finish, ErrorPolicy* er LALR_ASSERT( grammar ); error_policy_ = error_policy; grammar_ = grammar; - position_ = start; + line_position_ = position_ = start; end_ = finish; line_ = 1; errors_ = 0; if ( !match_grammar() ) { - error( 1, LALR_ERROR_SYNTAX, "parsing grammar failed" ); + error( LALR_ERROR_SYNTAX, "parsing grammar failed" ); } return errors_; } @@ -63,12 +64,12 @@ bool GrammarParser::match_statements() while ( match_statement() ) { } - return true; + return true; } bool GrammarParser::match_statement() { - return + return match_associativity_statement() || match_whitespace_statement() || match_production_statement() @@ -93,7 +94,7 @@ bool GrammarParser::match_whitespace_statement() grammar_->whitespace(); if ( match_regex() ) { - grammar_->regex( lexeme_.c_str(), line_ ); + grammar_->regex( lexeme_.c_str(), line_, column() ); } expect( ";" ); return true; @@ -105,7 +106,7 @@ bool GrammarParser::match_production_statement() { if ( match_identifier() ) { - grammar_->production( lexeme_.c_str(), line_ ); + grammar_->production( lexeme_.c_str(), line_, column() ); expect( ":" ); match_expressions(); expect( ";" ); @@ -127,22 +128,22 @@ bool GrammarParser::match_symbol() { if ( match_error() ) { - grammar_->error( line_ ); + grammar_->error( line_, column() ); return true; } else if ( match_literal() ) { - grammar_->literal( lexeme_.c_str(), line_ ); + grammar_->literal( lexeme_.c_str(), line_, column() ); return true; } else if ( match_regex() ) { - grammar_->regex( lexeme_.c_str(), line_ ); + grammar_->regex( lexeme_.c_str(), line_, column() ); return true; } else if ( match_identifier() ) { - grammar_->identifier( lexeme_.c_str(), line_ ); + grammar_->identifier( lexeme_.c_str(), line_, column() ); return true; } return false; @@ -202,12 +203,12 @@ bool GrammarParser::match_action() { if ( match_identifier() ) { - grammar_->action( lexeme_.c_str(), line_ ); + grammar_->action( lexeme_.c_str(), line_, column() ); } expect( "]" ); return true; } - grammar_->end_expression( line_ ); + grammar_->end_expression( line_, column() ); return false; } @@ -235,7 +236,7 @@ bool GrammarParser::match_literal() expect( "'" ); return true; } - error( line_, LALR_ERROR_UNTERMINATED_LITERAL, "unterminated literal" ); + error( LALR_ERROR_UNTERMINATED_LITERAL, "unterminated literal" ); return false; } return false; @@ -297,11 +298,12 @@ bool GrammarParser::match_whitespace() if ( is_new_line(position) ) { ++line_; + line_position_ = position; } ++position; } position_ = position; - return true; + return true; } return false; } @@ -348,7 +350,7 @@ bool GrammarParser::match_block_comment() } } position_ = position; - return true; + return true; } return false; } @@ -388,11 +390,11 @@ bool GrammarParser::expect( const char* lexeme ) return true; } position_ = end_; - error( line_, LALR_ERROR_SYNTAX, "expected '%s' not found", lexeme ); + error( LALR_ERROR_SYNTAX, "expected '%s' not found", lexeme ); return false; } -void GrammarParser::error( int line, int error, const char* format, ... ) +void GrammarParser::error( int error, const char* format, ... ) { LALR_ASSERT( format ); ++errors_; @@ -400,7 +402,7 @@ void GrammarParser::error( int line, int error, const char* format, ... ) { va_list args; va_start( args, format ); - error_policy_->lalr_error( line, 0, error, format, args ); + error_policy_->lalr_error( line_, column(), error, format, args ); va_end( args ); } } @@ -417,6 +419,7 @@ const char* GrammarParser::new_line( const char* position ) ++position; } ++line_; + line_position_ = position; } else if ( *position == '\r' ) { @@ -426,6 +429,7 @@ const char* GrammarParser::new_line( const char* position ) ++position; } ++line_; + line_position_ = position; } } return position; @@ -435,3 +439,10 @@ bool GrammarParser::is_new_line( const char* position ) { return *position == '\n' || *position == '\r'; } + +int GrammarParser::column() +{ + LALR_ASSERT( position_ ); + LALR_ASSERT( position_ >= line_position_ ); + return int(position_ - line_position_); +} diff --git a/src/lalr/GrammarParser.hpp b/src/lalr/GrammarParser.hpp index b77d364..142e1d8 100644 --- a/src/lalr/GrammarParser.hpp +++ b/src/lalr/GrammarParser.hpp @@ -16,6 +16,7 @@ class GrammarParser Grammar* grammar_; ///< Grammar to build from parsing input. const char* position_; ///< Current input position. const char* end_; ///< One past the last character of input to parse. + const char* line_position_; ///< Current line position. int line_; ///< Current line number. std::string lexeme_; ///< Currently parsed lexeme. int errors_; ///< The number of errors that occured during parsing and generation. @@ -50,9 +51,10 @@ class GrammarParser bool match( const char* lexeme ); bool match_without_skipping_whitespace( const char* lexeme ); bool expect( const char* lexeme ); - void error( int line, int error, const char* format, ... ); + void error( int error, const char* format, ... ); const char* new_line( const char* position ); static bool is_new_line( const char* position ); + int column(); }; } diff --git a/src/lalr/GrammarSymbol.cpp b/src/lalr/GrammarSymbol.cpp index 779379b..487f0e2 100644 --- a/src/lalr/GrammarSymbol.cpp +++ b/src/lalr/GrammarSymbol.cpp @@ -17,6 +17,7 @@ GrammarSymbol::GrammarSymbol( const char* lexeme ) , associativity_( ASSOCIATE_NULL ) , precedence_( 0 ) , line_( 0 ) +, column_( 0 ) , index_( -1 ) , nullable_( false ) , referenced_in_precedence_directive_( false ) @@ -112,6 +113,12 @@ void GrammarSymbol::set_line( int line ) line_ = line; } +void GrammarSymbol::set_column( int column ) +{ + LALR_ASSERT( column >= 0 ); + column_ = column; +} + void GrammarSymbol::set_index( int index ) { LALR_ASSERT( index >= 0 ); diff --git a/src/lalr/GrammarSymbol.hpp b/src/lalr/GrammarSymbol.hpp index 57fd79d..e8d109e 100644 --- a/src/lalr/GrammarSymbol.hpp +++ b/src/lalr/GrammarSymbol.hpp @@ -23,6 +23,7 @@ class GrammarSymbol Associativity associativity_; ///< The associativity of this symbol. int precedence_; ///< The precedence of this symbol. int line_; ///< The line that this symbol is defined on. + int column_; ///< The line column that this symbol is defined on. int index_; ///< The index of this symbol among all symbols. bool nullable_; ///< True if this symbol is nullable otherwise false. bool referenced_in_precedence_directive_; ///< True if this symbol is referenced by a %precedence directive. @@ -42,6 +43,7 @@ class GrammarSymbol inline Associativity associativity() const; inline int precedence() const; inline int line() const; + inline int column() const; inline int index() const; inline bool nullable() const; inline bool referenced_in_precedence_directive() const; @@ -60,6 +62,7 @@ class GrammarSymbol void set_associativity( Associativity associativity ); void set_precedence( int precedence ); void set_line( int line ); + void set_column( int column ); void set_index( int index ); void set_nullable( bool nullable ); void set_referenced_in_precedence_directive( bool referenced_in_precedence_directive ); diff --git a/src/lalr/GrammarSymbol.ipp b/src/lalr/GrammarSymbol.ipp index 86c6e79..5cec4d9 100644 --- a/src/lalr/GrammarSymbol.ipp +++ b/src/lalr/GrammarSymbol.ipp @@ -52,6 +52,11 @@ int GrammarSymbol::line() const return line_; } +int GrammarSymbol::column() const +{ + return column_; +} + int GrammarSymbol::index() const { return index_;