Skip to content

Commit

Permalink
Add meta-information to lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
XChy committed Aug 7, 2023
1 parent 688e713 commit f86a9ae
Show file tree
Hide file tree
Showing 13 changed files with 812 additions and 807 deletions.
113 changes: 61 additions & 52 deletions XSharp/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,33 @@ Lexer::Lexer() {}
std::vector<Token> Lexer::tokenize(const XString& source)
{
std::vector<Token> tokens;
currentIter = source.begin();
auto beginIter = source.begin();
Token t;
Span span{.row = 1, .col = 1};
while ((t = fetchFrom(currentIter, span)).type != Eof) tokens.push_back(t);
while ((t = fetchFrom(beginIter, span)).type != Eof) tokens.push_back(t);
tokens.push_back({Eof, "Eof", span});
return tokens;
}

Token Lexer::fetchFrom(XString::const_iterator& iter, Span& span)
{
auto cur = [&iter]() { return *iter; };
auto peek = [&iter]() { return *(iter + 1); };
auto next = [&iter]() { return *(++iter); };
auto cur = [&]() { return *iter; };
auto peek = [&]() { return *(iter + 1); };
auto next = [&]() {
span.col++;
return *(++iter);
};

while (cur().isSpace() || cur() == '\n') {
if (cur() == '\n') {
span.col = 1;
span.row++;
} else {
span.col++;
}
iter++;
next();
}
if (cur() == '\0') return {Eof};

Span spanTemp = span;

if (iter->isDigit()) { // number
XString value;

Expand All @@ -43,26 +45,36 @@ Token Lexer::fetchFrom(XString::const_iterator& iter, Span& span)
next();
next();
// needn't analyze '0x' part
return Token(Integer, hex(iter, span), span);
return Token(Integer, hex(iter, span), spanTemp);
} else if (peek() == 'b' || peek() == 'B') {
next();
next();
return Token(Integer, binary(iter, span), span);
return Token(Integer, binary(iter, span), spanTemp);
} else {
return floatPoint(iter, span);
bool isFP;
XString result = floatPoint(iter, span, isFP);
if (isFP)
return Token(FloatingPoint, result, spanTemp);
else
return Token(Integer, result, spanTemp);
}
} else {
return floatPoint(iter, span);
bool isFP;
XString result = floatPoint(iter, span, isFP);
if (isFP)
return Token(FloatingPoint, result, spanTemp);
else
return Token(Integer, result, spanTemp);
}
} else if (XSharp::isOp(cur())) {
} else if (XSharp::isInOp(cur())) {
XString value;
value.append(cur());
next();
while (XSharp::isOp(cur()) && XSharp::opContains(value + cur())) {
while (XSharp::isInOp(cur()) && XSharp::isParialOp(value + cur())) {
value.append(cur());
next();
}
return Token(Operator, value, span);
return Token(Operator, value, spanTemp);
} else if (cur().isLetter() || cur() == '_') {
XString value;
value.append(cur());
Expand All @@ -72,40 +84,40 @@ Token Lexer::fetchFrom(XString::const_iterator& iter, Span& span)
next();
}
if (value == "true")
return Token(Boolean, value, span);
return Token(Boolean, value, spanTemp);
else if (value == "false")
return Token(Boolean, value, span);
return Token(Boolean, value, spanTemp);
else if (value == "null")
return Token(Null, value, span);
return Token(Null, value, spanTemp);
else if (XSharp::isKeyword(value))
return Token(Keyword, value, span);
return Token(Keyword, value, spanTemp);
else
return Token(Identifier, value, span);
return Token(Identifier, value, spanTemp);

} else if (cur() == ';') {
next();
return Token(SentenceEnd, ";", span);
return Token(SentenceEnd, ";", spanTemp);
} else if (cur() == '(') {
next();
return Token(OpenParen, "(", span);
return Token(OpenParen, "(", spanTemp);
} else if (cur() == ')') {
next();
return Token(CloseParen, ")", span);
return Token(CloseParen, ")", spanTemp);
} else if (cur() == '[') {
next();
return Token(OpenBracket, "[", span);
return Token(OpenBracket, "[", spanTemp);
} else if (cur() == ']') {
next();
return Token(CloseBracket, "]", span);
return Token(CloseBracket, "]", spanTemp);
} else if (cur() == '{') {
next();
return Token(OpenBrace, "{", span);
return Token(OpenBrace, "{", spanTemp);
} else if (cur() == '}') {
next();
return Token(CloseBrace, "}", span);
return Token(CloseBrace, "}", spanTemp);
} else if (cur() == ',') {
next();
return Token(Comma, ",", span);
return Token(Comma, ",", spanTemp);
} else if (cur() == '\'') {
next();
XString value;
Expand All @@ -126,7 +138,7 @@ Token Lexer::fetchFrom(XString::const_iterator& iter, Span& span)
// TODO: Error?
}
// TODO: check count of char
return (Token(Char, value, span));
return (Token(Char, value, spanTemp));
} else if (cur() == '\"') {
next();
XString value;
Expand All @@ -147,19 +159,13 @@ Token Lexer::fetchFrom(XString::const_iterator& iter, Span& span)
// TODO: Error?
}

return Token(String, value, span);
return Token(String, value, spanTemp);
} else if (cur() == '.') {
next();
return Token(Dot, ".", span);
return Token(Dot, ".", spanTemp);
} else if (cur() == ':') {
next();
return Token(Colon, ":", span);
} else if (cur() == '\n' || cur() == '\r') {
next();
span.row++;
span.col = 1;
} else if (cur().isSpace()) {
next();
return Token(Colon, ":", spanTemp);
} else {
throw XSharpError(XString("Unknown char:").append(cur()));
}
Expand All @@ -182,9 +188,12 @@ XString Lexer::hex(CharIter& iter, Span& span)

XString Lexer::binary(CharIter& iter, Span& span)
{
auto cur = [&iter]() { return *iter; };
auto peek = [&iter]() { return *(iter + 1); };
auto next = [&iter]() { return *(++iter); };
auto cur = [&]() { return *iter; };
auto peek = [&]() { return *(iter + 1); };
auto next = [&]() {
span.col++;
return *(++iter);
};

XString result;
while (cur().isDigit()) {
Expand All @@ -194,24 +203,24 @@ XString Lexer::binary(CharIter& iter, Span& span)
return XString::fromInterger(result.toInteger<int64_t>(2), 10);
}

Token Lexer::floatPoint(CharIter& iter, Span& span)
XString Lexer::floatPoint(CharIter& iter, Span& span, bool& isFP)
{
auto cur = [&iter]() { return *iter; };
auto peek = [&iter]() { return *(iter + 1); };
auto next = [&iter]() { return *(++iter); };
auto cur = [&]() { return *iter; };
auto peek = [&]() { return *(iter + 1); };
auto next = [&]() {
span.col++;
return *(++iter);
};

XString result;
bool isDecimal = false;
isFP = false;

while (cur().isDigit() || cur() == '.') {
if (cur() == '.') isDecimal = true;
if (cur() == '.') isFP = true;

result.append(cur());
next();
}

if (isDecimal)
return Token(Decimal, result, span);
else
return Token(Integer, result, span);
return result;
}
4 changes: 1 addition & 3 deletions XSharp/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,12 @@ class XSharp_EXPORT Lexer
bool consumeIs(TokenType type);
void consume();

private:
static Token fetchFrom(CharIter& iter, Span& span);
static XString hex(CharIter& iter, Span& span); // Hexadecimal
static XString binary(CharIter& iter, Span& span); // Binary
static Token floatPoint(CharIter& iter, Span& span); // Floating-point
static XString floatPoint(CharIter& iter, Span& span, bool& isFP); // Floating-point

// the iterator to current Character
CharIter currentIter;
};

// Implements consume/peek
Expand Down
2 changes: 1 addition & 1 deletion XSharp/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ ASTNode* Parser::factor()
ASTNode* factor;
if (current->type == Integer) {
factor = new IntegerNode(current->value.toInteger<int64_t>());
} else if (current->type == Decimal) {
} else if (current->type == FloatingPoint) {
factor = new FPNode(current->value.toDouble());
} else if (current->type == Boolean) {
factor = new BooleanNode(current->value == "true");
Expand Down
16 changes: 7 additions & 9 deletions XSharp/Tokens.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ XString Token::dump() const
case TokenType::Integer:
result.append("Integer:");
break;
case TokenType::Decimal:
result.append("DecimalFraction:");
case TokenType::FloatingPoint:
result.append("FloatingPoint:");
break;
case TokenType::Char:
result.append("Char:");
Expand Down Expand Up @@ -119,18 +119,16 @@ bool XSharp::isOperator(const XString &oper)
return operators.contains(oper);
}

bool XSharp::isOp(XChar oper)
bool XSharp::isInOp(XChar oper)
{
for (auto operStr : operators) {
for (auto operStr : operators)
if (operStr.contains(oper)) return true;
}
return false;
}

bool XSharp::opContains(const XString &part)
bool XSharp::isParialOp(const XString &part)
{
for (auto operStr : operators) {
if (operStr.subStringIndex(part) != -1) return true;
}
for (auto operStr : operators)
if (operStr.subStringIndex(part) != 0) return true;
return false;
}
6 changes: 3 additions & 3 deletions XSharp/Tokens.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ static std::unordered_set<XString> operators = {
};

bool isOperator(const XString& oper);
bool isOp(XChar oper);
bool isInOp(XChar oper);

bool opContains(const XString& part);
bool isParialOp(const XString& part);

} // namespace XSharp

enum TokenType {
Integer,
Decimal,
FloatingPoint,
Boolean,
Char,
String,
Expand Down
Loading

0 comments on commit f86a9ae

Please sign in to comment.