diff options
author | Aaron Seigo <aseigo@kde.org> | 2014-12-14 12:00:05 +0100 |
---|---|---|
committer | Aaron Seigo <aseigo@kde.org> | 2014-12-14 12:00:05 +0100 |
commit | 7cc25005b8c46d1fa783d33def2c6923e8ef8469 (patch) | |
tree | 64fa59d17af29838396cf37b912b3babd885e5dd /common/unqlite/jx9_lex.c | |
parent | bfc32f265e8ad72823db960fed371d72596003b7 (diff) | |
parent | a6ed70495f9f3ecb21c26860dda16aadcdc91c3a (diff) | |
download | sink-7cc25005b8c46d1fa783d33def2c6923e8ef8469.tar.gz sink-7cc25005b8c46d1fa783d33def2c6923e8ef8469.zip |
Merge branch 'unqlite'
Diffstat (limited to 'common/unqlite/jx9_lex.c')
-rw-r--r-- | common/unqlite/jx9_lex.c | 758 |
1 files changed, 758 insertions, 0 deletions
diff --git a/common/unqlite/jx9_lex.c b/common/unqlite/jx9_lex.c new file mode 100644 index 0000000..7799950 --- /dev/null +++ b/common/unqlite/jx9_lex.c | |||
@@ -0,0 +1,758 @@ | |||
1 | /* | ||
2 | * Symisc JX9: A Highly Efficient Embeddable Scripting Engine Based on JSON. | ||
3 | * Copyright (C) 2012-2013, Symisc Systems http://jx9.symisc.net/ | ||
4 | * Version 1.7.2 | ||
5 | * For information on licensing, redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES | ||
6 | * please contact Symisc Systems via: | ||
7 | * legal@symisc.net | ||
8 | * licensing@symisc.net | ||
9 | * contact@symisc.net | ||
10 | * or visit: | ||
11 | * http://jx9.symisc.net/ | ||
12 | */ | ||
13 | /* $SymiscID: lex.c v1.0 FreeBSD 2012-12-09 00:19 stable <chm@symisc.net> $ */ | ||
14 | #ifndef JX9_AMALGAMATION | ||
15 | #include "jx9Int.h" | ||
16 | #endif | ||
17 | /* This file implements a thread-safe and full reentrant lexical analyzer for the Jx9 programming language */ | ||
18 | /* Forward declarations */ | ||
19 | static sxu32 keywordCode(const char *z,int n); | ||
20 | static sxi32 LexExtractNowdoc(SyStream *pStream,SyToken *pToken); | ||
21 | /* | ||
22 | * Tokenize a raw jx9 input. | ||
23 | * Get a single low-level token from the input file. Update the stream pointer so that | ||
24 | * it points to the first character beyond the extracted token. | ||
25 | */ | ||
26 | static sxi32 jx9TokenizeInput(SyStream *pStream,SyToken *pToken,void *pUserData,void *pCtxData) | ||
27 | { | ||
28 | SyString *pStr; | ||
29 | sxi32 rc; | ||
30 | /* Ignore leading white spaces */ | ||
31 | while( pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisSpace(pStream->zText[0]) ){ | ||
32 | /* Advance the stream cursor */ | ||
33 | if( pStream->zText[0] == '\n' ){ | ||
34 | /* Update line counter */ | ||
35 | pStream->nLine++; | ||
36 | } | ||
37 | pStream->zText++; | ||
38 | } | ||
39 | if( pStream->zText >= pStream->zEnd ){ | ||
40 | /* End of input reached */ | ||
41 | return SXERR_EOF; | ||
42 | } | ||
43 | /* Record token starting position and line */ | ||
44 | pToken->nLine = pStream->nLine; | ||
45 | pToken->pUserData = 0; | ||
46 | pStr = &pToken->sData; | ||
47 | SyStringInitFromBuf(pStr, pStream->zText, 0); | ||
48 | if( pStream->zText[0] >= 0xc0 || SyisAlpha(pStream->zText[0]) || pStream->zText[0] == '_' ){ | ||
49 | /* The following code fragment is taken verbatim from the xPP source tree. | ||
50 | * xPP is a modern embeddable macro processor with advanced features useful for | ||
51 | * application seeking for a production quality, ready to use macro processor. | ||
52 | * xPP is a widely used library developed and maintened by Symisc Systems. | ||
53 | * You can reach the xPP home page by following this link: | ||
54 | * http://xpp.symisc.net/ | ||
55 | */ | ||
56 | const unsigned char *zIn; | ||
57 | sxu32 nKeyword; | ||
58 | /* Isolate UTF-8 or alphanumeric stream */ | ||
59 | if( pStream->zText[0] < 0xc0 ){ | ||
60 | pStream->zText++; | ||
61 | } | ||
62 | for(;;){ | ||
63 | zIn = pStream->zText; | ||
64 | if( zIn[0] >= 0xc0 ){ | ||
65 | zIn++; | ||
66 | /* UTF-8 stream */ | ||
67 | while( zIn < pStream->zEnd && ((zIn[0] & 0xc0) == 0x80) ){ | ||
68 | zIn++; | ||
69 | } | ||
70 | } | ||
71 | /* Skip alphanumeric stream */ | ||
72 | while( zIn < pStream->zEnd && zIn[0] < 0xc0 && (SyisAlphaNum(zIn[0]) || zIn[0] == '_') ){ | ||
73 | zIn++; | ||
74 | } | ||
75 | if( zIn == pStream->zText ){ | ||
76 | /* Not an UTF-8 or alphanumeric stream */ | ||
77 | break; | ||
78 | } | ||
79 | /* Synchronize pointers */ | ||
80 | pStream->zText = zIn; | ||
81 | } | ||
82 | /* Record token length */ | ||
83 | pStr->nByte = (sxu32)((const char *)pStream->zText-pStr->zString); | ||
84 | nKeyword = keywordCode(pStr->zString, (int)pStr->nByte); | ||
85 | if( nKeyword != JX9_TK_ID ){ | ||
86 | /* We are dealing with a keyword [i.e: if, function, CREATE, ...], save the keyword ID */ | ||
87 | pToken->nType = JX9_TK_KEYWORD; | ||
88 | pToken->pUserData = SX_INT_TO_PTR(nKeyword); | ||
89 | }else{ | ||
90 | /* A simple identifier */ | ||
91 | pToken->nType = JX9_TK_ID; | ||
92 | } | ||
93 | }else{ | ||
94 | sxi32 c; | ||
95 | /* Non-alpha stream */ | ||
96 | if( pStream->zText[0] == '#' || | ||
97 | ( pStream->zText[0] == '/' && &pStream->zText[1] < pStream->zEnd && pStream->zText[1] == '/') ){ | ||
98 | pStream->zText++; | ||
99 | /* Inline comments */ | ||
100 | while( pStream->zText < pStream->zEnd && pStream->zText[0] != '\n' ){ | ||
101 | pStream->zText++; | ||
102 | } | ||
103 | /* Tell the upper-layer to ignore this token */ | ||
104 | return SXERR_CONTINUE; | ||
105 | }else if( pStream->zText[0] == '/' && &pStream->zText[1] < pStream->zEnd && pStream->zText[1] == '*' ){ | ||
106 | pStream->zText += 2; | ||
107 | /* Block comment */ | ||
108 | while( pStream->zText < pStream->zEnd ){ | ||
109 | if( pStream->zText[0] == '*' ){ | ||
110 | if( &pStream->zText[1] >= pStream->zEnd || pStream->zText[1] == '/' ){ | ||
111 | break; | ||
112 | } | ||
113 | } | ||
114 | if( pStream->zText[0] == '\n' ){ | ||
115 | pStream->nLine++; | ||
116 | } | ||
117 | pStream->zText++; | ||
118 | } | ||
119 | pStream->zText += 2; | ||
120 | /* Tell the upper-layer to ignore this token */ | ||
121 | return SXERR_CONTINUE; | ||
122 | }else if( SyisDigit(pStream->zText[0]) ){ | ||
123 | pStream->zText++; | ||
124 | /* Decimal digit stream */ | ||
125 | while( pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisDigit(pStream->zText[0]) ){ | ||
126 | pStream->zText++; | ||
127 | } | ||
128 | /* Mark the token as integer until we encounter a real number */ | ||
129 | pToken->nType = JX9_TK_INTEGER; | ||
130 | if( pStream->zText < pStream->zEnd ){ | ||
131 | c = pStream->zText[0]; | ||
132 | if( c == '.' ){ | ||
133 | /* Real number */ | ||
134 | pStream->zText++; | ||
135 | while( pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisDigit(pStream->zText[0]) ){ | ||
136 | pStream->zText++; | ||
137 | } | ||
138 | if( pStream->zText < pStream->zEnd ){ | ||
139 | c = pStream->zText[0]; | ||
140 | if( c=='e' || c=='E' ){ | ||
141 | pStream->zText++; | ||
142 | if( pStream->zText < pStream->zEnd ){ | ||
143 | c = pStream->zText[0]; | ||
144 | if( (c =='+' || c=='-') && &pStream->zText[1] < pStream->zEnd && | ||
145 | pStream->zText[1] < 0xc0 && SyisDigit(pStream->zText[1]) ){ | ||
146 | pStream->zText++; | ||
147 | } | ||
148 | while( pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisDigit(pStream->zText[0]) ){ | ||
149 | pStream->zText++; | ||
150 | } | ||
151 | } | ||
152 | } | ||
153 | } | ||
154 | pToken->nType = JX9_TK_REAL; | ||
155 | }else if( c=='e' || c=='E' ){ | ||
156 | SXUNUSED(pUserData); /* Prevent compiler warning */ | ||
157 | SXUNUSED(pCtxData); | ||
158 | pStream->zText++; | ||
159 | if( pStream->zText < pStream->zEnd ){ | ||
160 | c = pStream->zText[0]; | ||
161 | if( (c =='+' || c=='-') && &pStream->zText[1] < pStream->zEnd && | ||
162 | pStream->zText[1] < 0xc0 && SyisDigit(pStream->zText[1]) ){ | ||
163 | pStream->zText++; | ||
164 | } | ||
165 | while( pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisDigit(pStream->zText[0]) ){ | ||
166 | pStream->zText++; | ||
167 | } | ||
168 | } | ||
169 | pToken->nType = JX9_TK_REAL; | ||
170 | }else if( c == 'x' || c == 'X' ){ | ||
171 | /* Hex digit stream */ | ||
172 | pStream->zText++; | ||
173 | while( pStream->zText < pStream->zEnd && pStream->zText[0] < 0xc0 && SyisHex(pStream->zText[0]) ){ | ||
174 | pStream->zText++; | ||
175 | } | ||
176 | }else if(c == 'b' || c == 'B' ){ | ||
177 | /* Binary digit stream */ | ||
178 | pStream->zText++; | ||
179 | while( pStream->zText < pStream->zEnd && (pStream->zText[0] == '0' || pStream->zText[0] == '1') ){ | ||
180 | pStream->zText++; | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | /* Record token length */ | ||
185 | pStr->nByte = (sxu32)((const char *)pStream->zText-pStr->zString); | ||
186 | return SXRET_OK; | ||
187 | } | ||
188 | c = pStream->zText[0]; | ||
189 | pStream->zText++; /* Advance the stream cursor */ | ||
190 | /* Assume we are dealing with an operator*/ | ||
191 | pToken->nType = JX9_TK_OP; | ||
192 | switch(c){ | ||
193 | case '$': pToken->nType = JX9_TK_DOLLAR; break; | ||
194 | case '{': pToken->nType = JX9_TK_OCB; break; | ||
195 | case '}': pToken->nType = JX9_TK_CCB; break; | ||
196 | case '(': pToken->nType = JX9_TK_LPAREN; break; | ||
197 | case '[': pToken->nType |= JX9_TK_OSB; break; /* Bitwise operation here, since the square bracket token '[' | ||
198 | * is a potential operator [i.e: subscripting] */ | ||
199 | case ']': pToken->nType = JX9_TK_CSB; break; | ||
200 | case ')': { | ||
201 | SySet *pTokSet = pStream->pSet; | ||
202 | /* Assemble type cast operators [i.e: (int), (float), (bool)...] */ | ||
203 | if( pTokSet->nUsed >= 2 ){ | ||
204 | SyToken *pTmp; | ||
205 | /* Peek the last recongnized token */ | ||
206 | pTmp = (SyToken *)SySetPeek(pTokSet); | ||
207 | if( pTmp->nType & JX9_TK_KEYWORD ){ | ||
208 | sxi32 nID = SX_PTR_TO_INT(pTmp->pUserData); | ||
209 | if( (sxu32)nID & (JX9_TKWRD_INT|JX9_TKWRD_FLOAT|JX9_TKWRD_STRING|JX9_TKWRD_BOOL) ){ | ||
210 | pTmp = (SyToken *)SySetAt(pTokSet, pTokSet->nUsed - 2); | ||
211 | if( pTmp->nType & JX9_TK_LPAREN ){ | ||
212 | /* Merge the three tokens '(' 'TYPE' ')' into a single one */ | ||
213 | const char * zTypeCast = "(int)"; | ||
214 | if( nID & JX9_TKWRD_FLOAT ){ | ||
215 | zTypeCast = "(float)"; | ||
216 | }else if( nID & JX9_TKWRD_BOOL ){ | ||
217 | zTypeCast = "(bool)"; | ||
218 | }else if( nID & JX9_TKWRD_STRING ){ | ||
219 | zTypeCast = "(string)"; | ||
220 | } | ||
221 | /* Reflect the change */ | ||
222 | pToken->nType = JX9_TK_OP; | ||
223 | SyStringInitFromBuf(&pToken->sData, zTypeCast, SyStrlen(zTypeCast)); | ||
224 | /* Save the instance associated with the type cast operator */ | ||
225 | pToken->pUserData = (void *)jx9ExprExtractOperator(&pToken->sData, 0); | ||
226 | /* Remove the two previous tokens */ | ||
227 | pTokSet->nUsed -= 2; | ||
228 | return SXRET_OK; | ||
229 | } | ||
230 | } | ||
231 | } | ||
232 | } | ||
233 | pToken->nType = JX9_TK_RPAREN; | ||
234 | break; | ||
235 | } | ||
236 | case '\'':{ | ||
237 | /* Single quoted string */ | ||
238 | pStr->zString++; | ||
239 | while( pStream->zText < pStream->zEnd ){ | ||
240 | if( pStream->zText[0] == '\'' ){ | ||
241 | if( pStream->zText[-1] != '\\' ){ | ||
242 | break; | ||
243 | }else{ | ||
244 | const unsigned char *zPtr = &pStream->zText[-2]; | ||
245 | sxi32 i = 1; | ||
246 | while( zPtr > pStream->zInput && zPtr[0] == '\\' ){ | ||
247 | zPtr--; | ||
248 | i++; | ||
249 | } | ||
250 | if((i&1)==0){ | ||
251 | break; | ||
252 | } | ||
253 | } | ||
254 | } | ||
255 | if( pStream->zText[0] == '\n' ){ | ||
256 | pStream->nLine++; | ||
257 | } | ||
258 | pStream->zText++; | ||
259 | } | ||
260 | /* Record token length and type */ | ||
261 | pStr->nByte = (sxu32)((const char *)pStream->zText-pStr->zString); | ||
262 | pToken->nType = JX9_TK_SSTR; | ||
263 | /* Jump the trailing single quote */ | ||
264 | pStream->zText++; | ||
265 | return SXRET_OK; | ||
266 | } | ||
267 | case '"':{ | ||
268 | sxi32 iNest; | ||
269 | /* Double quoted string */ | ||
270 | pStr->zString++; | ||
271 | while( pStream->zText < pStream->zEnd ){ | ||
272 | if( pStream->zText[0] == '{' && &pStream->zText[1] < pStream->zEnd && pStream->zText[1] == '$'){ | ||
273 | iNest = 1; | ||
274 | pStream->zText++; | ||
275 | /* TICKET 1433-40: Hnadle braces'{}' in double quoted string where everything is allowed */ | ||
276 | while(pStream->zText < pStream->zEnd ){ | ||
277 | if( pStream->zText[0] == '{' ){ | ||
278 | iNest++; | ||
279 | }else if (pStream->zText[0] == '}' ){ | ||
280 | iNest--; | ||
281 | if( iNest <= 0 ){ | ||
282 | pStream->zText++; | ||
283 | break; | ||
284 | } | ||
285 | }else if( pStream->zText[0] == '\n' ){ | ||
286 | pStream->nLine++; | ||
287 | } | ||
288 | pStream->zText++; | ||
289 | } | ||
290 | if( pStream->zText >= pStream->zEnd ){ | ||
291 | break; | ||
292 | } | ||
293 | } | ||
294 | if( pStream->zText[0] == '"' ){ | ||
295 | if( pStream->zText[-1] != '\\' ){ | ||
296 | break; | ||
297 | }else{ | ||
298 | const unsigned char *zPtr = &pStream->zText[-2]; | ||
299 | sxi32 i = 1; | ||
300 | while( zPtr > pStream->zInput && zPtr[0] == '\\' ){ | ||
301 | zPtr--; | ||
302 | i++; | ||
303 | } | ||
304 | if((i&1)==0){ | ||
305 | break; | ||
306 | } | ||
307 | } | ||
308 | } | ||
309 | if( pStream->zText[0] == '\n' ){ | ||
310 | pStream->nLine++; | ||
311 | } | ||
312 | pStream->zText++; | ||
313 | } | ||
314 | /* Record token length and type */ | ||
315 | pStr->nByte = (sxu32)((const char *)pStream->zText-pStr->zString); | ||
316 | pToken->nType = JX9_TK_DSTR; | ||
317 | /* Jump the trailing quote */ | ||
318 | pStream->zText++; | ||
319 | return SXRET_OK; | ||
320 | } | ||
321 | case ':': | ||
322 | pToken->nType = JX9_TK_COLON; /* Single colon */ | ||
323 | break; | ||
324 | case ',': pToken->nType |= JX9_TK_COMMA; break; /* Comma is also an operator */ | ||
325 | case ';': pToken->nType = JX9_TK_SEMI; break; | ||
326 | /* Handle combined operators [i.e: +=, ===, !=== ...] */ | ||
327 | case '=': | ||
328 | pToken->nType |= JX9_TK_EQUAL; | ||
329 | if( pStream->zText < pStream->zEnd ){ | ||
330 | if( pStream->zText[0] == '=' ){ | ||
331 | pToken->nType &= ~JX9_TK_EQUAL; | ||
332 | /* Current operator: == */ | ||
333 | pStream->zText++; | ||
334 | if( pStream->zText < pStream->zEnd && pStream->zText[0] == '=' ){ | ||
335 | /* Current operator: === */ | ||
336 | pStream->zText++; | ||
337 | } | ||
338 | } | ||
339 | } | ||
340 | break; | ||
341 | case '!': | ||
342 | if( pStream->zText < pStream->zEnd && pStream->zText[0] == '=' ){ | ||
343 | /* Current operator: != */ | ||
344 | pStream->zText++; | ||
345 | if( pStream->zText < pStream->zEnd && pStream->zText[0] == '=' ){ | ||
346 | /* Current operator: !== */ | ||
347 | pStream->zText++; | ||
348 | } | ||
349 | } | ||
350 | break; | ||
351 | case '&': | ||
352 | pToken->nType |= JX9_TK_AMPER; | ||
353 | if( pStream->zText < pStream->zEnd ){ | ||
354 | if( pStream->zText[0] == '&' ){ | ||
355 | pToken->nType &= ~JX9_TK_AMPER; | ||
356 | /* Current operator: && */ | ||
357 | pStream->zText++; | ||
358 | }else if( pStream->zText[0] == '=' ){ | ||
359 | pToken->nType &= ~JX9_TK_AMPER; | ||
360 | /* Current operator: &= */ | ||
361 | pStream->zText++; | ||
362 | } | ||
363 | } | ||
364 | case '.': | ||
365 | if( pStream->zText < pStream->zEnd && (pStream->zText[0] == '.' || pStream->zText[0] == '=') ){ | ||
366 | /* Concatenation operator: '..' or '.=' */ | ||
367 | pStream->zText++; | ||
368 | } | ||
369 | break; | ||
370 | case '|': | ||
371 | if( pStream->zText < pStream->zEnd ){ | ||
372 | if( pStream->zText[0] == '|' ){ | ||
373 | /* Current operator: || */ | ||
374 | pStream->zText++; | ||
375 | }else if( pStream->zText[0] == '=' ){ | ||
376 | /* Current operator: |= */ | ||
377 | pStream->zText++; | ||
378 | } | ||
379 | } | ||
380 | break; | ||
381 | case '+': | ||
382 | if( pStream->zText < pStream->zEnd ){ | ||
383 | if( pStream->zText[0] == '+' ){ | ||
384 | /* Current operator: ++ */ | ||
385 | pStream->zText++; | ||
386 | }else if( pStream->zText[0] == '=' ){ | ||
387 | /* Current operator: += */ | ||
388 | pStream->zText++; | ||
389 | } | ||
390 | } | ||
391 | break; | ||
392 | case '-': | ||
393 | if( pStream->zText < pStream->zEnd ){ | ||
394 | if( pStream->zText[0] == '-' ){ | ||
395 | /* Current operator: -- */ | ||
396 | pStream->zText++; | ||
397 | }else if( pStream->zText[0] == '=' ){ | ||
398 | /* Current operator: -= */ | ||
399 | pStream->zText++; | ||
400 | }else if( pStream->zText[0] == '>' ){ | ||
401 | /* Current operator: -> */ | ||
402 | pStream->zText++; | ||
403 | } | ||
404 | } | ||
405 | break; | ||
406 | case '*': | ||
407 | if( pStream->zText < pStream->zEnd && pStream->zText[0] == '=' ){ | ||
408 | /* Current operator: *= */ | ||
409 | pStream->zText++; | ||
410 | } | ||
411 | break; | ||
412 | case '/': | ||
413 | if( pStream->zText < pStream->zEnd && pStream->zText[0] == '=' ){ | ||
414 | /* Current operator: /= */ | ||
415 | pStream->zText++; | ||
416 | } | ||
417 | break; | ||
418 | case '%': | ||
419 | if( pStream->zText < pStream->zEnd && pStream->zText[0] == '=' ){ | ||
420 | /* Current operator: %= */ | ||
421 | pStream->zText++; | ||
422 | } | ||
423 | break; | ||
424 | case '^': | ||
425 | if( pStream->zText < pStream->zEnd && pStream->zText[0] == '=' ){ | ||
426 | /* Current operator: ^= */ | ||
427 | pStream->zText++; | ||
428 | } | ||
429 | break; | ||
430 | case '<': | ||
431 | if( pStream->zText < pStream->zEnd ){ | ||
432 | if( pStream->zText[0] == '<' ){ | ||
433 | /* Current operator: << */ | ||
434 | pStream->zText++; | ||
435 | if( pStream->zText < pStream->zEnd ){ | ||
436 | if( pStream->zText[0] == '=' ){ | ||
437 | /* Current operator: <<= */ | ||
438 | pStream->zText++; | ||
439 | }else if( pStream->zText[0] == '<' ){ | ||
440 | /* Current Token: <<< */ | ||
441 | pStream->zText++; | ||
442 | /* This may be the beginning of a Heredoc/Nowdoc string, try to delimit it */ | ||
443 | rc = LexExtractNowdoc(&(*pStream), &(*pToken)); | ||
444 | if( rc == SXRET_OK ){ | ||
445 | /* Here/Now doc successfuly extracted */ | ||
446 | return SXRET_OK; | ||
447 | } | ||
448 | } | ||
449 | } | ||
450 | }else if( pStream->zText[0] == '>' ){ | ||
451 | /* Current operator: <> */ | ||
452 | pStream->zText++; | ||
453 | }else if( pStream->zText[0] == '=' ){ | ||
454 | /* Current operator: <= */ | ||
455 | pStream->zText++; | ||
456 | } | ||
457 | } | ||
458 | break; | ||
459 | case '>': | ||
460 | if( pStream->zText < pStream->zEnd ){ | ||
461 | if( pStream->zText[0] == '>' ){ | ||
462 | /* Current operator: >> */ | ||
463 | pStream->zText++; | ||
464 | if( pStream->zText < pStream->zEnd && pStream->zText[0] == '=' ){ | ||
465 | /* Current operator: >>= */ | ||
466 | pStream->zText++; | ||
467 | } | ||
468 | }else if( pStream->zText[0] == '=' ){ | ||
469 | /* Current operator: >= */ | ||
470 | pStream->zText++; | ||
471 | } | ||
472 | } | ||
473 | break; | ||
474 | default: | ||
475 | break; | ||
476 | } | ||
477 | if( pStr->nByte <= 0 ){ | ||
478 | /* Record token length */ | ||
479 | pStr->nByte = (sxu32)((const char *)pStream->zText-pStr->zString); | ||
480 | } | ||
481 | if( pToken->nType & JX9_TK_OP ){ | ||
482 | const jx9_expr_op *pOp; | ||
483 | /* Check if the extracted token is an operator */ | ||
484 | pOp = jx9ExprExtractOperator(pStr, (SyToken *)SySetPeek(pStream->pSet)); | ||
485 | if( pOp == 0 ){ | ||
486 | /* Not an operator */ | ||
487 | pToken->nType &= ~JX9_TK_OP; | ||
488 | if( pToken->nType <= 0 ){ | ||
489 | pToken->nType = JX9_TK_OTHER; | ||
490 | } | ||
491 | }else{ | ||
492 | /* Save the instance associated with this operator for later processing */ | ||
493 | pToken->pUserData = (void *)pOp; | ||
494 | } | ||
495 | } | ||
496 | } | ||
497 | /* Tell the upper-layer to save the extracted token for later processing */ | ||
498 | return SXRET_OK; | ||
499 | } | ||
500 | /***** This file contains automatically generated code ****** | ||
501 | ** | ||
502 | ** The code in this file has been automatically generated by | ||
503 | ** | ||
504 | ** $Header: /sqlite/sqlite/tool/mkkeywordhash.c,v 1.38 2011/12/21 01:00:46 <chm@symisc.net> $ | ||
505 | ** | ||
506 | ** The code in this file implements a function that determines whether | ||
507 | ** or not a given identifier is really a JX9 keyword. The same thing | ||
508 | ** might be implemented more directly using a hand-written hash table. | ||
509 | ** But by using this automatically generated code, the size of the code | ||
510 | ** is substantially reduced. This is important for embedded applications | ||
511 | ** on platforms with limited memory. | ||
512 | */ | ||
513 | /* Hash score: 35 */ | ||
514 | static sxu32 keywordCode(const char *z, int n) | ||
515 | { | ||
516 | /* zText[] encodes 188 bytes of keywords in 128 bytes */ | ||
517 | /* printegereturnconstaticaselseifloatincludefaultDIEXITcontinue */ | ||
518 | /* diewhileASPRINTbooleanbreakforeachfunctionimportstringswitch */ | ||
519 | /* uplink */ | ||
520 | static const char zText[127] = { | ||
521 | 'p','r','i','n','t','e','g','e','r','e','t','u','r','n','c','o','n','s', | ||
522 | 't','a','t','i','c','a','s','e','l','s','e','i','f','l','o','a','t','i', | ||
523 | 'n','c','l','u','d','e','f','a','u','l','t','D','I','E','X','I','T','c', | ||
524 | 'o','n','t','i','n','u','e','d','i','e','w','h','i','l','e','A','S','P', | ||
525 | 'R','I','N','T','b','o','o','l','e','a','n','b','r','e','a','k','f','o', | ||
526 | 'r','e','a','c','h','f','u','n','c','t','i','o','n','i','m','p','o','r', | ||
527 | 't','s','t','r','i','n','g','s','w','i','t','c','h','u','p','l','i','n', | ||
528 | 'k', | ||
529 | }; | ||
530 | static const unsigned char aHash[59] = { | ||
531 | 0, 0, 0, 0, 15, 0, 30, 0, 0, 2, 19, 18, 0, | ||
532 | 0, 10, 3, 12, 0, 28, 29, 23, 0, 13, 22, 0, 0, | ||
533 | 14, 24, 25, 31, 11, 0, 0, 0, 0, 1, 5, 0, 0, | ||
534 | 20, 0, 27, 9, 0, 0, 0, 8, 0, 0, 26, 6, 0, | ||
535 | 0, 17, 0, 0, 0, 0, 0, | ||
536 | }; | ||
537 | static const unsigned char aNext[31] = { | ||
538 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, | ||
539 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 21, 7, | ||
540 | 0, 0, 0, 0, 0, | ||
541 | }; | ||
542 | static const unsigned char aLen[31] = { | ||
543 | 5, 7, 3, 6, 5, 6, 4, 2, 6, 4, 2, 5, 7, | ||
544 | 7, 3, 4, 8, 3, 5, 2, 5, 4, 7, 5, 3, 7, | ||
545 | 8, 6, 6, 6, 6, | ||
546 | }; | ||
547 | static const sxu16 aOffset[31] = { | ||
548 | 0, 2, 2, 8, 14, 17, 22, 23, 25, 25, 29, 30, 35, | ||
549 | 40, 47, 49, 53, 61, 64, 69, 71, 76, 76, 83, 88, 88, | ||
550 | 95, 103, 109, 115, 121, | ||
551 | }; | ||
552 | static const sxu32 aCode[31] = { | ||
553 | JX9_TKWRD_PRINT, JX9_TKWRD_INT, JX9_TKWRD_INT, JX9_TKWRD_RETURN, JX9_TKWRD_CONST, | ||
554 | JX9_TKWRD_STATIC, JX9_TKWRD_CASE, JX9_TKWRD_AS, JX9_TKWRD_ELIF, JX9_TKWRD_ELSE, | ||
555 | JX9_TKWRD_IF, JX9_TKWRD_FLOAT, JX9_TKWRD_INCLUDE, JX9_TKWRD_DEFAULT, JX9_TKWRD_DIE, | ||
556 | JX9_TKWRD_EXIT, JX9_TKWRD_CONTINUE, JX9_TKWRD_DIE, JX9_TKWRD_WHILE, JX9_TKWRD_AS, | ||
557 | JX9_TKWRD_PRINT, JX9_TKWRD_BOOL, JX9_TKWRD_BOOL, JX9_TKWRD_BREAK, JX9_TKWRD_FOR, | ||
558 | JX9_TKWRD_FOREACH, JX9_TKWRD_FUNCTION, JX9_TKWRD_IMPORT, JX9_TKWRD_STRING, JX9_TKWRD_SWITCH, | ||
559 | JX9_TKWRD_UPLINK, | ||
560 | }; | ||
561 | int h, i; | ||
562 | if( n<2 ) return JX9_TK_ID; | ||
563 | h = (((int)z[0]*4) ^ ((int)z[n-1]*3) ^ n) % 59; | ||
564 | for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){ | ||
565 | if( (int)aLen[i]==n && SyMemcmp(&zText[aOffset[i]],z,n)==0 ){ | ||
566 | /* JX9_TKWRD_PRINT */ | ||
567 | /* JX9_TKWRD_INT */ | ||
568 | /* JX9_TKWRD_INT */ | ||
569 | /* JX9_TKWRD_RETURN */ | ||
570 | /* JX9_TKWRD_CONST */ | ||
571 | /* JX9_TKWRD_STATIC */ | ||
572 | /* JX9_TKWRD_CASE */ | ||
573 | /* JX9_TKWRD_AS */ | ||
574 | /* JX9_TKWRD_ELIF */ | ||
575 | /* JX9_TKWRD_ELSE */ | ||
576 | /* JX9_TKWRD_IF */ | ||
577 | /* JX9_TKWRD_FLOAT */ | ||
578 | /* JX9_TKWRD_INCLUDE */ | ||
579 | /* JX9_TKWRD_DEFAULT */ | ||
580 | /* JX9_TKWRD_DIE */ | ||
581 | /* JX9_TKWRD_EXIT */ | ||
582 | /* JX9_TKWRD_CONTINUE */ | ||
583 | /* JX9_TKWRD_DIE */ | ||
584 | /* JX9_TKWRD_WHILE */ | ||
585 | /* JX9_TKWRD_AS */ | ||
586 | /* JX9_TKWRD_PRINT */ | ||
587 | /* JX9_TKWRD_BOOL */ | ||
588 | /* JX9_TKWRD_BOOL */ | ||
589 | /* JX9_TKWRD_BREAK */ | ||
590 | /* JX9_TKWRD_FOR */ | ||
591 | /* JX9_TKWRD_FOREACH */ | ||
592 | /* JX9_TKWRD_FUNCTION */ | ||
593 | /* JX9_TKWRD_IMPORT */ | ||
594 | /* JX9_TKWRD_STRING */ | ||
595 | /* JX9_TKWRD_SWITCH */ | ||
596 | /* JX9_TKWRD_UPLINK */ | ||
597 | return aCode[i]; | ||
598 | } | ||
599 | } | ||
600 | return JX9_TK_ID; | ||
601 | } | ||
602 | /* | ||
603 | * Extract a heredoc/nowdoc text from a raw JX9 input. | ||
604 | * According to the JX9 language reference manual: | ||
605 | * A third way to delimit strings is the heredoc syntax: <<<. After this operator, an identifier | ||
606 | * is provided, then a newline. The string itself follows, and then the same identifier again | ||
607 | * to close the quotation. | ||
608 | * The closing identifier must begin in the first column of the line. Also, the identifier must | ||
609 | * follow the same naming rules as any other label in JX9: it must contain only alphanumeric | ||
610 | * characters and underscores, and must start with a non-digit character or underscore. | ||
611 | * Heredoc text behaves just like a double-quoted string, without the double quotes. | ||
612 | * This means that quotes in a heredoc do not need to be escaped, but the escape codes listed | ||
613 | * above can still be used. Variables are expanded, but the same care must be taken when expressing | ||
614 | * complex variables inside a heredoc as with strings. | ||
615 | * Nowdocs are to single-quoted strings what heredocs are to double-quoted strings. | ||
616 | * A nowdoc is specified similarly to a heredoc, but no parsing is done inside a nowdoc. | ||
617 | * The construct is ideal for embedding JX9 code or other large blocks of text without the need | ||
618 | * for escaping. It shares some features in common with the SGML <![CDATA[ ]]> construct, in that | ||
619 | * it declares a block of text which is not for parsing. | ||
620 | * A nowdoc is identified with the same <<< sequence used for heredocs, but the identifier which follows | ||
621 | * is enclosed in single quotes, e.g. <<<'EOT'. All the rules for heredoc identifiers also apply to nowdoc | ||
622 | * identifiers, especially those regarding the appearance of the closing identifier. | ||
623 | */ | ||
624 | static sxi32 LexExtractNowdoc(SyStream *pStream, SyToken *pToken) | ||
625 | { | ||
626 | const unsigned char *zIn = pStream->zText; | ||
627 | const unsigned char *zEnd = pStream->zEnd; | ||
628 | const unsigned char *zPtr; | ||
629 | SyString sDelim; | ||
630 | SyString sStr; | ||
631 | /* Jump leading white spaces */ | ||
632 | while( zIn < zEnd && zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n' ){ | ||
633 | zIn++; | ||
634 | } | ||
635 | if( zIn >= zEnd ){ | ||
636 | /* A simple symbol, return immediately */ | ||
637 | return SXERR_CONTINUE; | ||
638 | } | ||
639 | if( zIn[0] == '\'' || zIn[0] == '"' ){ | ||
640 | zIn++; | ||
641 | } | ||
642 | if( zIn[0] < 0xc0 && !SyisAlphaNum(zIn[0]) && zIn[0] != '_' ){ | ||
643 | /* Invalid delimiter, return immediately */ | ||
644 | return SXERR_CONTINUE; | ||
645 | } | ||
646 | /* Isolate the identifier */ | ||
647 | sDelim.zString = (const char *)zIn; | ||
648 | for(;;){ | ||
649 | zPtr = zIn; | ||
650 | /* Skip alphanumeric stream */ | ||
651 | while( zPtr < zEnd && zPtr[0] < 0xc0 && (SyisAlphaNum(zPtr[0]) || zPtr[0] == '_') ){ | ||
652 | zPtr++; | ||
653 | } | ||
654 | if( zPtr < zEnd && zPtr[0] >= 0xc0 ){ | ||
655 | zPtr++; | ||
656 | /* UTF-8 stream */ | ||
657 | while( zPtr < zEnd && ((zPtr[0] & 0xc0) == 0x80) ){ | ||
658 | zPtr++; | ||
659 | } | ||
660 | } | ||
661 | if( zPtr == zIn ){ | ||
662 | /* Not an UTF-8 or alphanumeric stream */ | ||
663 | break; | ||
664 | } | ||
665 | /* Synchronize pointers */ | ||
666 | zIn = zPtr; | ||
667 | } | ||
668 | /* Get the identifier length */ | ||
669 | sDelim.nByte = (sxu32)((const char *)zIn-sDelim.zString); | ||
670 | if( zIn[0] == '"' || zIn[0] == '\'' ){ | ||
671 | /* Jump the trailing single quote */ | ||
672 | zIn++; | ||
673 | } | ||
674 | /* Jump trailing white spaces */ | ||
675 | while( zIn < zEnd && zIn[0] < 0xc0 && SyisSpace(zIn[0]) && zIn[0] != '\n' ){ | ||
676 | zIn++; | ||
677 | } | ||
678 | if( sDelim.nByte <= 0 || zIn >= zEnd || zIn[0] != '\n' ){ | ||
679 | /* Invalid syntax */ | ||
680 | return SXERR_CONTINUE; | ||
681 | } | ||
682 | pStream->nLine++; /* Increment line counter */ | ||
683 | zIn++; | ||
684 | /* Isolate the delimited string */ | ||
685 | sStr.zString = (const char *)zIn; | ||
686 | /* Go and found the closing delimiter */ | ||
687 | for(;;){ | ||
688 | /* Synchronize with the next line */ | ||
689 | while( zIn < zEnd && zIn[0] != '\n' ){ | ||
690 | zIn++; | ||
691 | } | ||
692 | if( zIn >= zEnd ){ | ||
693 | /* End of the input reached, break immediately */ | ||
694 | pStream->zText = pStream->zEnd; | ||
695 | break; | ||
696 | } | ||
697 | pStream->nLine++; /* Increment line counter */ | ||
698 | zIn++; | ||
699 | if( (sxu32)(zEnd - zIn) >= sDelim.nByte && SyMemcmp((const void *)sDelim.zString, (const void *)zIn, sDelim.nByte) == 0 ){ | ||
700 | zPtr = &zIn[sDelim.nByte]; | ||
701 | while( zPtr < zEnd && zPtr[0] < 0xc0 && SyisSpace(zPtr[0]) && zPtr[0] != '\n' ){ | ||
702 | zPtr++; | ||
703 | } | ||
704 | if( zPtr >= zEnd ){ | ||
705 | /* End of input */ | ||
706 | pStream->zText = zPtr; | ||
707 | break; | ||
708 | } | ||
709 | if( zPtr[0] == ';' ){ | ||
710 | const unsigned char *zCur = zPtr; | ||
711 | zPtr++; | ||
712 | while( zPtr < zEnd && zPtr[0] < 0xc0 && SyisSpace(zPtr[0]) && zPtr[0] != '\n' ){ | ||
713 | zPtr++; | ||
714 | } | ||
715 | if( zPtr >= zEnd || zPtr[0] == '\n' ){ | ||
716 | /* Closing delimiter found, break immediately */ | ||
717 | pStream->zText = zCur; /* Keep the semi-colon */ | ||
718 | break; | ||
719 | } | ||
720 | }else if( zPtr[0] == '\n' ){ | ||
721 | /* Closing delimiter found, break immediately */ | ||
722 | pStream->zText = zPtr; /* Synchronize with the stream cursor */ | ||
723 | break; | ||
724 | } | ||
725 | /* Synchronize pointers and continue searching */ | ||
726 | zIn = zPtr; | ||
727 | } | ||
728 | } /* For(;;) */ | ||
729 | /* Get the delimited string length */ | ||
730 | sStr.nByte = (sxu32)((const char *)zIn-sStr.zString); | ||
731 | /* Record token type and length */ | ||
732 | pToken->nType = JX9_TK_NOWDOC; | ||
733 | SyStringDupPtr(&pToken->sData, &sStr); | ||
734 | /* Remove trailing white spaces */ | ||
735 | SyStringRightTrim(&pToken->sData); | ||
736 | /* All done */ | ||
737 | return SXRET_OK; | ||
738 | } | ||
739 | /* | ||
740 | * Tokenize a raw jx9 input. | ||
741 | * This is the public tokenizer called by most code generator routines. | ||
742 | */ | ||
743 | JX9_PRIVATE sxi32 jx9Tokenize(const char *zInput,sxu32 nLen,SySet *pOut) | ||
744 | { | ||
745 | SyLex sLexer; | ||
746 | sxi32 rc; | ||
747 | /* Initialize the lexer */ | ||
748 | rc = SyLexInit(&sLexer, &(*pOut),jx9TokenizeInput,0); | ||
749 | if( rc != SXRET_OK ){ | ||
750 | return rc; | ||
751 | } | ||
752 | /* Tokenize input */ | ||
753 | rc = SyLexTokenizeInput(&sLexer, zInput, nLen, 0, 0, 0); | ||
754 | /* Release the lexer */ | ||
755 | SyLexRelease(&sLexer); | ||
756 | /* Tokenization result */ | ||
757 | return rc; | ||
758 | } | ||