/
lexical.go
245 lines (231 loc) Β· 4.38 KB
/
lexical.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
package lexical
import (
"fmt"
"strings"
"unicode"
)
var source string
var currentPointer int
var CurrentLexeme string
type TokenError struct {
char int32
}
func (error *TokenError) Error() string {
character := string(error.char)
if char == T_EOF {
character = "EOF"
}
return fmt.Sprintf("Unexpected char '%s' with source '%s'", character, source)
}
func throwTokenError(char int32) *TokenError {
error := new(TokenError)
error.char = char
return error
}
var char int32
func New(s string) {
source = s
currentPointer = 0
char = nextChar()
}
func Token() (uint8, *TokenError) {
var lexeme string
defer func() {
CurrentLexeme = lexeme
}()
state := S_START
for true {
switch state {
case S_START:
if unicode.IsLetter(char) {
state = S_ID
break
} else if unicode.IsNumber(char) {
state = S_NUMERIC
} else {
lexeme = lexeme + string(char)
switch lexeme {
case "*":
state = S_WILD_CARD
break
case "(":
state = S_PARENTH_L
break
case ")":
state = S_PARENTH_R
break
case ",":
state = S_COMMA
break
case ";":
state = S_SEMICOLON
break
case ">":
state = S_GREATER
break
case "<":
state = S_SMALLER
break
case "=":
state = S_EQUAL
break
case "!":
state = S_NOT_EQUAL
break
case "'":
lexeme = ""
char = nextChar()
state = S_LITERAL
break
case "\"":
lexeme = ""
char = nextChar()
state = S_LITERAL_2
break
case " ":
lexeme = ""
char = nextChar()
state = S_START
break
default:
if char == T_EOF {
return T_EOF, nil
}
return T_FUCK, throwTokenError(char)
}
}
break
case S_ID:
for unicode.IsLetter(char) || unicode.IsNumber(char) || string(char) == "_" {
lexeme = lexeme + string(char)
char = nextChar()
}
return lexemeToToken(lexeme), nil
case S_NUMERIC:
for unicode.IsNumber(char) {
lexeme = lexeme + string(char)
char = nextChar()
}
return T_NUMERIC, nil
case S_WILD_CARD:
char = nextChar()
return T_WILD_CARD, nil
case S_COMMA:
char = nextChar()
return T_COMMA, nil
case S_SEMICOLON:
char = nextChar()
return T_SEMICOLON, nil
case S_GREATER:
char = nextChar()
lexeme = string(char)
if lexeme == "=" {
state = S_GREATER_OR_EQUAL
break
}
return T_GREATER, nil
case S_GREATER_OR_EQUAL:
char = nextChar()
return T_GREATER_OR_EQUAL, nil
case S_SMALLER:
char = nextChar()
lexeme = string(char)
if lexeme == "=" {
state = S_SMALLER_OR_EQUAL
break
} else if lexeme == ">" {
char = nextChar()
return T_NOT_EQUAL, nil
}
return T_SMALLER, nil
case S_SMALLER_OR_EQUAL:
char = nextChar()
return T_SMALLER_OR_EQUAL, nil
case S_EQUAL:
char = nextChar()
return T_EQUAL, nil
case S_NOT_EQUAL:
char = nextChar()
lexeme = string(char)
if lexeme == "=" {
char = nextChar()
return T_NOT_EQUAL, nil
}
return 0, throwTokenError(char)
case S_LITERAL:
for string(char) != "'" && char != T_EOF {
lexeme = lexeme + string(char)
char = nextChar()
}
if char == T_EOF {
return 0, throwTokenError(char)
}
char = nextChar()
return T_LITERAL, nil
case S_LITERAL_2:
for string(char) != "\"" && char != T_EOF {
lexeme = lexeme + string(char)
char = nextChar()
}
if char == T_EOF {
return 0, throwTokenError(char)
}
char = nextChar()
return T_LITERAL, nil
case S_PARENTH_L:
char = nextChar()
return T_PARENTH_L, nil
case S_PARENTH_R:
char = nextChar()
return T_PARENTH_R, nil
default:
state = S_START
}
}
return T_EOF, throwTokenError(char)
}
func lexemeToToken(lexeme string) uint8 {
switch strings.ToLower(lexeme) {
case L_SELECT:
return T_SELECT
case L_FROM:
return T_FROM
case L_WHERE:
return T_WHERE
case L_ORDER:
return T_ORDER
case L_BY:
return T_BY
case L_OR:
return T_OR
case L_AND:
return T_AND
case L_LIMIT:
return T_LIMIT
case L_IN:
return T_IN
case L_ASC:
return T_ASC
case L_DESC:
return T_DESC
case L_LIKE:
return T_LIKE
case L_NOT:
return T_NOT
case L_COUNT:
return T_COUNT
}
return T_ID
}
func nextChar() int32 {
defer func() {
currentPointer = currentPointer + 1
}()
if currentPointer >= len(source) {
return T_EOF
}
return int32(source[currentPointer])
}
func rewind() {
currentPointer = 0
}