Skip to content

Commit

Permalink
fix: Improved memory usage and Lexer & Parser performance
Browse files Browse the repository at this point in the history
  • Loading branch information
alinalihassan committed May 16, 2022
1 parent 41547ed commit 043ab55
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 105 deletions.
2 changes: 1 addition & 1 deletion src/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ int main(int argc, char **argv) {
if (options->debug) {
print(DEBUG, "TOKENS: \n");
for (const auto &tok: lexer->getTokens())
print("Token: {}\n", tok.Dump(srcMgr));
print("Token: {}\n", tok->Dump(srcMgr));
}

// Parser
Expand Down
34 changes: 17 additions & 17 deletions src/liblesma/Frontend/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

using namespace lesma;

std::vector<Token> Lexer::ScanAll() {
while (tokens.empty() || tokens.back().type != TokenType::EOF_TOKEN)
std::vector<Token*> Lexer::ScanAll() {
while (tokens.empty() || tokens.back()->type != TokenType::EOF_TOKEN)
tokens.push_back(ScanOne(false));
return tokens;
}

Token Lexer::ScanOne(bool continuation) {
Token *Lexer::ScanOne(bool continuation) {
if (IsAtEnd())
return Token{TokenType::EOF_TOKEN, "EOF", llvm::SMRange{begin_loc, loc}};
return new Token{TokenType::EOF_TOKEN, "EOF", llvm::SMRange{begin_loc, loc}};
ResetTokenBeg();
char c = Advance();

Expand Down Expand Up @@ -135,7 +135,7 @@ Token Lexer::ScanOne(bool continuation) {
line++;
col = 1;
if (!continuation && level_ == 0)
tokens.push_back(AddToken({TokenType::NEWLINE, "NEWLINE", llvm::SMRange{begin_loc, loc}}));
tokens.push_back(AddToken(new Token{TokenType::NEWLINE, "NEWLINE", llvm::SMRange{begin_loc, loc}}));
HandleIndentation(continuation);
return ScanOne(false);
case '"':
Expand Down Expand Up @@ -189,7 +189,7 @@ bool Lexer::HandleIndentation(bool continuation) {
if (continuation || level_ != 0 || c == '#' || c == '\n' || c == '\r') {
if (c == '#' || c == '\n') {
// If this line is a commented line or an empty line, don't emit NewLine
if (!tokens.empty() && tokens.back().type == TokenType::NEWLINE) {
if (!tokens.empty() && tokens.back()->type == TokenType::NEWLINE) {
tokens.pop_back();
}
}
Expand Down Expand Up @@ -232,20 +232,20 @@ bool Lexer::HandleIndentation(bool continuation) {
}

while (changes != 0) {
tokens.push_back(AddToken({changes > 0 ? TokenType::INDENT : TokenType::DEDENT, changes > 0 ? "INDENT" : "DEDENT", llvm::SMRange{begin_loc, loc}}));
tokens.push_back(AddToken(new Token{changes > 0 ? TokenType::INDENT : TokenType::DEDENT, changes > 0 ? "INDENT" : "DEDENT", llvm::SMRange{begin_loc, loc}}));
changes += changes > 0 ? -1 : 1;
}
return true;
}

// TODO: Could possibly make it more efficient
Token Lexer::AddToken(TokenType type) {
auto ret = Token(type, std::string(begin_loc.getPointer(), loc.getPointer()), llvm::SMRange{begin_loc, loc});
Token *Lexer::AddToken(TokenType type) {
auto ret = new Token(type, std::string(begin_loc.getPointer(), loc.getPointer()), llvm::SMRange{begin_loc, loc});
ResetTokenBeg();
return ret;
}

Token Lexer::AddToken(Token tok) {
Token *Lexer::AddToken(Token *tok) {
ResetTokenBeg();
return tok;
}
Expand Down Expand Up @@ -280,7 +280,7 @@ char Lexer::Peek(int offset) {
return *(loc.getPointer() + offset);
}

Token Lexer::AddStringToken() {
Token *Lexer::AddStringToken() {
std::string string;

while (Peek() != '"' && !IsAtEnd()) {
Expand Down Expand Up @@ -338,11 +338,11 @@ Token Lexer::AddStringToken() {
// Skip the closing ".
Advance();

auto ret = Token(TokenType::STRING, string, llvm::SMRange{begin_loc, loc});
auto ret = new Token(TokenType::STRING, string, llvm::SMRange{begin_loc, loc});
ResetTokenBeg();
return ret;
}
Token Lexer::AddNumToken() {
Token *Lexer::AddNumToken() {
while (IsDigit(Peek())) Advance();

// Look for a fractional part.
Expand All @@ -358,20 +358,20 @@ Token Lexer::AddNumToken() {
}
}

Token Lexer::GetLastToken() {
Token *Lexer::GetLastToken() {
if (!tokens.empty())
return tokens.end()[-1];
else
return Token{TokenType::EOF_TOKEN, "EOF", llvm::SMRange{begin_loc, loc}};
return new Token{TokenType::EOF_TOKEN, "EOF", llvm::SMRange{begin_loc, loc}};
}

Token Lexer::AddIdentifierToken() {
Token *Lexer::AddIdentifierToken() {
while (IsAlphaNumeric(Peek())) Advance();

auto tok = AddToken(Token::GetIdentifierType(std::string(begin_loc.getPointer(), loc.getPointer()), GetLastToken()));

// If it's an 'else if' multiword keyword, remove the last token (which is an 'else' in this case)
if (tok.type == TokenType::ELSE_IF)
if (tok->type == TokenType::ELSE_IF)
tokens.pop_back();

return tok;
Expand Down
20 changes: 10 additions & 10 deletions src/liblesma/Frontend/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ namespace lesma {
curPtr(curBuffer->getBufferStart()), begin_loc(llvm::SMLoc::getFromPointer(curPtr)), loc(llvm::SMLoc::getFromPointer(curPtr)), srcMgr(srcMgr) {
}

std::vector<Token> ScanAll();
Token ScanOne(bool continuation = false);
std::vector<Token> getTokens() { return tokens; };
std::vector<Token*> ScanAll();
Token *ScanOne(bool continuation = false);
std::vector<Token*> getTokens() { return tokens; };

void Reset() {
Lexer tmp(srcMgr);
Expand All @@ -34,18 +34,18 @@ namespace lesma {

char Peek(int offset = 0);

Token AddStringToken();
Token *AddStringToken();

static bool IsDigit(char c) { return c >= '0' && c <= '9'; }

static bool IsAlpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; }

static bool IsAlphaNumeric(char c) { return IsAlpha(c) || IsDigit(c); }

Token AddNumToken();
Token *AddNumToken();

Token AddToken(TokenType type);
Token AddToken(Token tok);
Token *AddToken(TokenType type);
Token *AddToken(Token *tok);

void Error(const std::string &msg) const;

Expand All @@ -55,8 +55,8 @@ namespace lesma {

char Advance();

Token GetLastToken();
Token AddIdentifierToken();
Token *GetLastToken();
Token *AddIdentifierToken();

void HandleWhitespace(char c);
bool HandleIndentation(bool continuation);
Expand All @@ -68,7 +68,7 @@ namespace lesma {
unsigned int col = 1;
llvm::SMLoc begin_loc;
llvm::SMLoc loc;
std::vector<Token> tokens;
std::vector<Token*> tokens;
std::shared_ptr<llvm::SourceMgr> srcMgr;

std::optional<char> first_indent_char;
Expand Down
Loading

0 comments on commit 043ab55

Please sign in to comment.