fix: Improved memory usage and Lexer & Parser performance

alinalihassan · May 16, 2022 · 043ab55 · 043ab55
1 parent 41547ed
commit 043ab55
Show file tree

Hide file tree

Showing 7 changed files with 105 additions and 105 deletions.
diff --git a/src/cli/main.cpp b/src/cli/main.cpp
@@ -74,7 +74,7 @@ int main(int argc, char **argv) {
         if (options->debug) {
             print(DEBUG, "TOKENS: \n");
             for (const auto &tok: lexer->getTokens())
-                print("Token: {}\n", tok.Dump(srcMgr));
+                print("Token: {}\n", tok->Dump(srcMgr));
         }
 
         // Parser

diff --git a/src/liblesma/Frontend/Lexer.cpp b/src/liblesma/Frontend/Lexer.cpp
@@ -2,15 +2,15 @@
 
 using namespace lesma;
 
-std::vector<Token> Lexer::ScanAll() {
-    while (tokens.empty() || tokens.back().type != TokenType::EOF_TOKEN)
+std::vector<Token*> Lexer::ScanAll() {
+    while (tokens.empty() || tokens.back()->type != TokenType::EOF_TOKEN)
         tokens.push_back(ScanOne(false));
     return tokens;
 }
 
-Token Lexer::ScanOne(bool continuation) {
+Token *Lexer::ScanOne(bool continuation) {
     if (IsAtEnd())
-        return Token{TokenType::EOF_TOKEN, "EOF", llvm::SMRange{begin_loc, loc}};
+        return new Token{TokenType::EOF_TOKEN, "EOF", llvm::SMRange{begin_loc, loc}};
     ResetTokenBeg();
     char c = Advance();
 
@@ -135,7 +135,7 @@ Token Lexer::ScanOne(bool continuation) {
             line++;
             col = 1;
             if (!continuation && level_ == 0)
-                tokens.push_back(AddToken({TokenType::NEWLINE, "NEWLINE", llvm::SMRange{begin_loc, loc}}));
+                tokens.push_back(AddToken(new Token{TokenType::NEWLINE, "NEWLINE", llvm::SMRange{begin_loc, loc}}));
             HandleIndentation(continuation);
             return ScanOne(false);
         case '"':
@@ -189,7 +189,7 @@ bool Lexer::HandleIndentation(bool continuation) {
     if (continuation || level_ != 0 || c == '#' || c == '\n' || c == '\r') {
         if (c == '#' || c == '\n') {
             // If this line is a commented line or an empty line, don't emit NewLine
-            if (!tokens.empty() && tokens.back().type == TokenType::NEWLINE) {
+            if (!tokens.empty() && tokens.back()->type == TokenType::NEWLINE) {
                 tokens.pop_back();
             }
         }
@@ -232,20 +232,20 @@ bool Lexer::HandleIndentation(bool continuation) {
     }
 
     while (changes != 0) {
-        tokens.push_back(AddToken({changes > 0 ? TokenType::INDENT : TokenType::DEDENT, changes > 0 ? "INDENT" : "DEDENT", llvm::SMRange{begin_loc, loc}}));
+        tokens.push_back(AddToken(new Token{changes > 0 ? TokenType::INDENT : TokenType::DEDENT, changes > 0 ? "INDENT" : "DEDENT", llvm::SMRange{begin_loc, loc}}));
         changes += changes > 0 ? -1 : 1;
     }
     return true;
 }
 
 // TODO: Could possibly make it more efficient
-Token Lexer::AddToken(TokenType type) {
-    auto ret = Token(type, std::string(begin_loc.getPointer(), loc.getPointer()), llvm::SMRange{begin_loc, loc});
+Token *Lexer::AddToken(TokenType type) {
+    auto ret = new Token(type, std::string(begin_loc.getPointer(), loc.getPointer()), llvm::SMRange{begin_loc, loc});
     ResetTokenBeg();
     return ret;
 }
 
-Token Lexer::AddToken(Token tok) {
+Token *Lexer::AddToken(Token *tok) {
     ResetTokenBeg();
     return tok;
 }
@@ -280,7 +280,7 @@ char Lexer::Peek(int offset) {
     return *(loc.getPointer() + offset);
 }
 
-Token Lexer::AddStringToken() {
+Token *Lexer::AddStringToken() {
     std::string string;
 
     while (Peek() != '"' && !IsAtEnd()) {
@@ -338,11 +338,11 @@ Token Lexer::AddStringToken() {
     // Skip the closing ".
     Advance();
 
-    auto ret = Token(TokenType::STRING, string, llvm::SMRange{begin_loc, loc});
+    auto ret = new Token(TokenType::STRING, string, llvm::SMRange{begin_loc, loc});
     ResetTokenBeg();
     return ret;
 }
-Token Lexer::AddNumToken() {
+Token *Lexer::AddNumToken() {
     while (IsDigit(Peek())) Advance();
 
     // Look for a fractional part.
@@ -358,20 +358,20 @@ Token Lexer::AddNumToken() {
     }
 }
 
-Token Lexer::GetLastToken() {
+Token *Lexer::GetLastToken() {
     if (!tokens.empty())
         return tokens.end()[-1];
     else
-        return Token{TokenType::EOF_TOKEN, "EOF", llvm::SMRange{begin_loc, loc}};
+        return new Token{TokenType::EOF_TOKEN, "EOF", llvm::SMRange{begin_loc, loc}};
 }
 
-Token Lexer::AddIdentifierToken() {
+Token *Lexer::AddIdentifierToken() {
     while (IsAlphaNumeric(Peek())) Advance();
 
     auto tok = AddToken(Token::GetIdentifierType(std::string(begin_loc.getPointer(), loc.getPointer()), GetLastToken()));
 
     // If it's an 'else if' multiword keyword, remove the last token (which is an 'else' in this case)
-    if (tok.type == TokenType::ELSE_IF)
+    if (tok->type == TokenType::ELSE_IF)
         tokens.pop_back();
 
     return tok;

diff --git a/src/liblesma/Frontend/Lexer.h b/src/liblesma/Frontend/Lexer.h
@@ -20,9 +20,9 @@ namespace lesma {
               curPtr(curBuffer->getBufferStart()), begin_loc(llvm::SMLoc::getFromPointer(curPtr)), loc(llvm::SMLoc::getFromPointer(curPtr)), srcMgr(srcMgr) {
         }
 
-        std::vector<Token> ScanAll();
-        Token ScanOne(bool continuation = false);
-        std::vector<Token> getTokens() { return tokens; };
+        std::vector<Token*> ScanAll();
+        Token *ScanOne(bool continuation = false);
+        std::vector<Token*> getTokens() { return tokens; };
 
         void Reset() {
             Lexer tmp(srcMgr);
@@ -34,18 +34,18 @@ namespace lesma {
 
         char Peek(int offset = 0);
 
-        Token AddStringToken();
+        Token *AddStringToken();
 
         static bool IsDigit(char c) { return c >= '0' && c <= '9'; }
 
         static bool IsAlpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; }
 
         static bool IsAlphaNumeric(char c) { return IsAlpha(c) || IsDigit(c); }
 
-        Token AddNumToken();
+        Token *AddNumToken();
 
-        Token AddToken(TokenType type);
-        Token AddToken(Token tok);
+        Token *AddToken(TokenType type);
+        Token *AddToken(Token *tok);
 
         void Error(const std::string &msg) const;
 
@@ -55,8 +55,8 @@ namespace lesma {
 
         char Advance();
 
-        Token GetLastToken();
-        Token AddIdentifierToken();
+        Token *GetLastToken();
+        Token *AddIdentifierToken();
 
         void HandleWhitespace(char c);
         bool HandleIndentation(bool continuation);
@@ -68,7 +68,7 @@ namespace lesma {
         unsigned int col = 1;
         llvm::SMLoc begin_loc;
         llvm::SMLoc loc;
-        std::vector<Token> tokens;
+        std::vector<Token*> tokens;
         std::shared_ptr<llvm::SourceMgr> srcMgr;
 
         std::optional<char> first_indent_char;