llex.c revision 76d05dc695b06c4e987bb8078f78032441e1430c
1/*
2** $Id: llex.c,v 2.63.1.2 2013/08/30 15:49:41 roberto Exp $
3** Lexical Analyzer
4** See Copyright Notice in lua.h
5*/
6
7
8#ifndef SYSLINUX
9#include <locale.h>
10#else
11#define getlocaledecpoint() '.'
12#endif
13#include <string.h>
14
15#define llex_c
16#define LUA_CORE
17
18#include "lua.h"
19
20#include "lctype.h"
21#include "ldo.h"
22#include "llex.h"
23#include "lobject.h"
24#include "lparser.h"
25#include "lstate.h"
26#include "lstring.h"
27#include "ltable.h"
28#include "lzio.h"
29
30
31
32#define next(ls) (ls->current = zgetc(ls->z))
33
34
35
36#define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
37
38
39/* ORDER RESERVED */
40static const char *const luaX_tokens [] = {
41    "and", "break", "do", "else", "elseif",
42    "end", "false", "for", "function", "goto", "if",
43    "in", "local", "nil", "not", "or", "repeat",
44    "return", "then", "true", "until", "while",
45    "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
46    "<number>", "<name>", "<string>"
47};
48
49
50#define save_and_next(ls) (save(ls, ls->current), next(ls))
51
52
53static l_noret lexerror (LexState *ls, const char *msg, int token);
54
55
56static void save (LexState *ls, int c) {
57  Mbuffer *b = ls->buff;
58  if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
59    size_t newsize;
60    if (luaZ_sizebuffer(b) >= MAX_SIZET/2)
61      lexerror(ls, "lexical element too long", 0);
62    newsize = luaZ_sizebuffer(b) * 2;
63    luaZ_resizebuffer(ls->L, b, newsize);
64  }
65  b->buffer[luaZ_bufflen(b)++] = cast(char, c);
66}
67
68
69void luaX_init (lua_State *L) {
70  int i;
71  for (i=0; i<NUM_RESERVED; i++) {
72    TString *ts = luaS_new(L, luaX_tokens[i]);
73    luaS_fix(ts);  /* reserved words are never collected */
74    ts->tsv.extra = cast_byte(i+1);  /* reserved word */
75  }
76}
77
78
79const char *luaX_token2str (LexState *ls, int token) {
80  if (token < FIRST_RESERVED) {  /* single-byte symbols? */
81    lua_assert(token == cast(unsigned char, token));
82    return (lisprint(token)) ? luaO_pushfstring(ls->L, LUA_QL("%c"), token) :
83                              luaO_pushfstring(ls->L, "char(%d)", token);
84  }
85  else {
86    const char *s = luaX_tokens[token - FIRST_RESERVED];
87    if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
88      return luaO_pushfstring(ls->L, LUA_QS, s);
89    else  /* names, strings, and numerals */
90      return s;
91  }
92}
93
94
95static const char *txtToken (LexState *ls, int token) {
96  switch (token) {
97    case TK_NAME:
98    case TK_STRING:
99    case TK_NUMBER:
100      save(ls, '\0');
101      return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
102    default:
103      return luaX_token2str(ls, token);
104  }
105}
106
107
108static l_noret lexerror (LexState *ls, const char *msg, int token) {
109  char buff[LUA_IDSIZE];
110  luaO_chunkid(buff, getstr(ls->source), LUA_IDSIZE);
111  msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
112  if (token)
113    luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
114  luaD_throw(ls->L, LUA_ERRSYNTAX);
115}
116
117
118l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
119  lexerror(ls, msg, ls->t.token);
120}
121
122
123/*
124** creates a new string and anchors it in function's table so that
125** it will not be collected until the end of the function's compilation
126** (by that time it should be anchored in function's prototype)
127*/
128TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
129  lua_State *L = ls->L;
130  TValue *o;  /* entry for `str' */
131  TString *ts = luaS_newlstr(L, str, l);  /* create new string */
132  setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */
133  o = luaH_set(L, ls->fs->h, L->top - 1);
134  if (ttisnil(o)) {  /* not in use yet? (see 'addK') */
135    /* boolean value does not need GC barrier;
136       table has no metatable, so it does not need to invalidate cache */
137    setbvalue(o, 1);  /* t[string] = true */
138    luaC_checkGC(L);
139  }
140  else {  /* string already present */
141    ts = rawtsvalue(keyfromval(o));  /* re-use value previously stored */
142  }
143  L->top--;  /* remove string from stack */
144  return ts;
145}
146
147
148/*
149** increment line number and skips newline sequence (any of
150** \n, \r, \n\r, or \r\n)
151*/
152static void inclinenumber (LexState *ls) {
153  int old = ls->current;
154  lua_assert(currIsNewline(ls));
155  next(ls);  /* skip `\n' or `\r' */
156  if (currIsNewline(ls) && ls->current != old)
157    next(ls);  /* skip `\n\r' or `\r\n' */
158  if (++ls->linenumber >= MAX_INT)
159    luaX_syntaxerror(ls, "chunk has too many lines");
160}
161
162
163void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
164                    int firstchar) {
165  ls->decpoint = '.';
166  ls->L = L;
167  ls->current = firstchar;
168  ls->lookahead.token = TK_EOS;  /* no look-ahead token */
169  ls->z = z;
170  ls->fs = NULL;
171  ls->linenumber = 1;
172  ls->lastline = 1;
173  ls->source = source;
174  ls->envn = luaS_new(L, LUA_ENV);  /* create env name */
175  luaS_fix(ls->envn);  /* never collect this name */
176  luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
177}
178
179
180
181/*
182** =======================================================
183** LEXICAL ANALYZER
184** =======================================================
185*/
186
187
188
189static int check_next (LexState *ls, const char *set) {
190  if (ls->current == '\0' || !strchr(set, ls->current))
191    return 0;
192  save_and_next(ls);
193  return 1;
194}
195
196
197/*
198** change all characters 'from' in buffer to 'to'
199*/
200static void buffreplace (LexState *ls, char from, char to) {
201  size_t n = luaZ_bufflen(ls->buff);
202  char *p = luaZ_buffer(ls->buff);
203  while (n--)
204    if (p[n] == from) p[n] = to;
205}
206
207
208#if !defined(getlocaledecpoint)
209#define getlocaledecpoint()	(localeconv()->decimal_point[0])
210#endif
211
212
213#define buff2d(b,e)	luaO_str2d(luaZ_buffer(b), luaZ_bufflen(b) - 1, e)
214
215/*
216** in case of format error, try to change decimal point separator to
217** the one defined in the current locale and check again
218*/
219static void trydecpoint (LexState *ls, SemInfo *seminfo) {
220  char old = ls->decpoint;
221  ls->decpoint = getlocaledecpoint();
222  buffreplace(ls, old, ls->decpoint);  /* try new decimal separator */
223  if (!buff2d(ls->buff, &seminfo->r)) {
224    /* format error with correct decimal point: no more options */
225    buffreplace(ls, ls->decpoint, '.');  /* undo change (for error message) */
226    lexerror(ls, "malformed number", TK_NUMBER);
227  }
228}
229
230
231/* LUA_NUMBER */
232/*
233** this function is quite liberal in what it accepts, as 'luaO_str2d'
234** will reject ill-formed numerals.
235*/
236static void read_numeral (LexState *ls, SemInfo *seminfo) {
237  const char *expo = "Ee";
238  int first = ls->current;
239  lua_assert(lisdigit(ls->current));
240  save_and_next(ls);
241  if (first == '0' && check_next(ls, "Xx"))  /* hexadecimal? */
242    expo = "Pp";
243  for (;;) {
244    if (check_next(ls, expo))  /* exponent part? */
245      check_next(ls, "+-");  /* optional exponent sign */
246    if (lisxdigit(ls->current) || ls->current == '.')
247      save_and_next(ls);
248    else  break;
249  }
250  save(ls, '\0');
251  buffreplace(ls, '.', ls->decpoint);  /* follow locale for decimal point */
252  if (!buff2d(ls->buff, &seminfo->r))  /* format error? */
253    trydecpoint(ls, seminfo); /* try to update decimal point separator */
254}
255
256
257/*
258** skip a sequence '[=*[' or ']=*]' and return its number of '='s or
259** -1 if sequence is malformed
260*/
261static int skip_sep (LexState *ls) {
262  int count = 0;
263  int s = ls->current;
264  lua_assert(s == '[' || s == ']');
265  save_and_next(ls);
266  while (ls->current == '=') {
267    save_and_next(ls);
268    count++;
269  }
270  return (ls->current == s) ? count : (-count) - 1;
271}
272
273
274static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
275  save_and_next(ls);  /* skip 2nd `[' */
276  if (currIsNewline(ls))  /* string starts with a newline? */
277    inclinenumber(ls);  /* skip it */
278  for (;;) {
279    switch (ls->current) {
280      case EOZ:
281        lexerror(ls, (seminfo) ? "unfinished long string" :
282                                 "unfinished long comment", TK_EOS);
283        break;  /* to avoid warnings */
284      case ']': {
285        if (skip_sep(ls) == sep) {
286          save_and_next(ls);  /* skip 2nd `]' */
287          goto endloop;
288        }
289        break;
290      }
291      case '\n': case '\r': {
292        save(ls, '\n');
293        inclinenumber(ls);
294        if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
295        break;
296      }
297      default: {
298        if (seminfo) save_and_next(ls);
299        else next(ls);
300      }
301    }
302  } endloop:
303  if (seminfo)
304    seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
305                                     luaZ_bufflen(ls->buff) - 2*(2 + sep));
306}
307
308
309static void escerror (LexState *ls, int *c, int n, const char *msg) {
310  int i;
311  luaZ_resetbuffer(ls->buff);  /* prepare error message */
312  save(ls, '\\');
313  for (i = 0; i < n && c[i] != EOZ; i++)
314    save(ls, c[i]);
315  lexerror(ls, msg, TK_STRING);
316}
317
318
319static int readhexaesc (LexState *ls) {
320  int c[3], i;  /* keep input for error message */
321  int r = 0;  /* result accumulator */
322  c[0] = 'x';  /* for error message */
323  for (i = 1; i < 3; i++) {  /* read two hexadecimal digits */
324    c[i] = next(ls);
325    if (!lisxdigit(c[i]))
326      escerror(ls, c, i + 1, "hexadecimal digit expected");
327    r = (r << 4) + luaO_hexavalue(c[i]);
328  }
329  return r;
330}
331
332
333static int readdecesc (LexState *ls) {
334  int c[3], i;
335  int r = 0;  /* result accumulator */
336  for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
337    c[i] = ls->current;
338    r = 10*r + c[i] - '0';
339    next(ls);
340  }
341  if (r > UCHAR_MAX)
342    escerror(ls, c, i, "decimal escape too large");
343  return r;
344}
345
346
347static void read_string (LexState *ls, int del, SemInfo *seminfo) {
348  save_and_next(ls);  /* keep delimiter (for error messages) */
349  while (ls->current != del) {
350    switch (ls->current) {
351      case EOZ:
352        lexerror(ls, "unfinished string", TK_EOS);
353        break;  /* to avoid warnings */
354      case '\n':
355      case '\r':
356        lexerror(ls, "unfinished string", TK_STRING);
357        break;  /* to avoid warnings */
358      case '\\': {  /* escape sequences */
359        int c;  /* final character to be saved */
360        next(ls);  /* do not save the `\' */
361        switch (ls->current) {
362          case 'a': c = '\a'; goto read_save;
363          case 'b': c = '\b'; goto read_save;
364          case 'f': c = '\f'; goto read_save;
365          case 'n': c = '\n'; goto read_save;
366          case 'r': c = '\r'; goto read_save;
367          case 't': c = '\t'; goto read_save;
368          case 'v': c = '\v'; goto read_save;
369          case 'x': c = readhexaesc(ls); goto read_save;
370          case '\n': case '\r':
371            inclinenumber(ls); c = '\n'; goto only_save;
372          case '\\': case '\"': case '\'':
373            c = ls->current; goto read_save;
374          case EOZ: goto no_save;  /* will raise an error next loop */
375          case 'z': {  /* zap following span of spaces */
376            next(ls);  /* skip the 'z' */
377            while (lisspace(ls->current)) {
378              if (currIsNewline(ls)) inclinenumber(ls);
379              else next(ls);
380            }
381            goto no_save;
382          }
383          default: {
384            if (!lisdigit(ls->current))
385              escerror(ls, &ls->current, 1, "invalid escape sequence");
386            /* digital escape \ddd */
387            c = readdecesc(ls);
388            goto only_save;
389          }
390        }
391       read_save: next(ls);  /* read next character */
392       only_save: save(ls, c);  /* save 'c' */
393       no_save: break;
394      }
395      default:
396        save_and_next(ls);
397    }
398  }
399  save_and_next(ls);  /* skip delimiter */
400  seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
401                                   luaZ_bufflen(ls->buff) - 2);
402}
403
404
405static int llex (LexState *ls, SemInfo *seminfo) {
406  luaZ_resetbuffer(ls->buff);
407  for (;;) {
408    switch (ls->current) {
409      case '\n': case '\r': {  /* line breaks */
410        inclinenumber(ls);
411        break;
412      }
413      case ' ': case '\f': case '\t': case '\v': {  /* spaces */
414        next(ls);
415        break;
416      }
417      case '-': {  /* '-' or '--' (comment) */
418        next(ls);
419        if (ls->current != '-') return '-';
420        /* else is a comment */
421        next(ls);
422        if (ls->current == '[') {  /* long comment? */
423          int sep = skip_sep(ls);
424          luaZ_resetbuffer(ls->buff);  /* `skip_sep' may dirty the buffer */
425          if (sep >= 0) {
426            read_long_string(ls, NULL, sep);  /* skip long comment */
427            luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
428            break;
429          }
430        }
431        /* else short comment */
432        while (!currIsNewline(ls) && ls->current != EOZ)
433          next(ls);  /* skip until end of line (or end of file) */
434        break;
435      }
436      case '[': {  /* long string or simply '[' */
437        int sep = skip_sep(ls);
438        if (sep >= 0) {
439          read_long_string(ls, seminfo, sep);
440          return TK_STRING;
441        }
442        else if (sep == -1) return '[';
443        else lexerror(ls, "invalid long string delimiter", TK_STRING);
444      }
445      case '=': {
446        next(ls);
447        if (ls->current != '=') return '=';
448        else { next(ls); return TK_EQ; }
449      }
450      case '<': {
451        next(ls);
452        if (ls->current != '=') return '<';
453        else { next(ls); return TK_LE; }
454      }
455      case '>': {
456        next(ls);
457        if (ls->current != '=') return '>';
458        else { next(ls); return TK_GE; }
459      }
460      case '~': {
461        next(ls);
462        if (ls->current != '=') return '~';
463        else { next(ls); return TK_NE; }
464      }
465      case ':': {
466        next(ls);
467        if (ls->current != ':') return ':';
468        else { next(ls); return TK_DBCOLON; }
469      }
470      case '"': case '\'': {  /* short literal strings */
471        read_string(ls, ls->current, seminfo);
472        return TK_STRING;
473      }
474      case '.': {  /* '.', '..', '...', or number */
475        save_and_next(ls);
476        if (check_next(ls, ".")) {
477          if (check_next(ls, "."))
478            return TK_DOTS;   /* '...' */
479          else return TK_CONCAT;   /* '..' */
480        }
481        else if (!lisdigit(ls->current)) return '.';
482        /* else go through */
483      }
484      case '0': case '1': case '2': case '3': case '4':
485      case '5': case '6': case '7': case '8': case '9': {
486        read_numeral(ls, seminfo);
487        return TK_NUMBER;
488      }
489      case EOZ: {
490        return TK_EOS;
491      }
492      default: {
493        if (lislalpha(ls->current)) {  /* identifier or reserved word? */
494          TString *ts;
495          do {
496            save_and_next(ls);
497          } while (lislalnum(ls->current));
498          ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
499                                  luaZ_bufflen(ls->buff));
500          seminfo->ts = ts;
501          if (isreserved(ts))  /* reserved word? */
502            return ts->tsv.extra - 1 + FIRST_RESERVED;
503          else {
504            return TK_NAME;
505          }
506        }
507        else {  /* single-char tokens (+ - / ...) */
508          int c = ls->current;
509          next(ls);
510          return c;
511        }
512      }
513    }
514  }
515}
516
517
518void luaX_next (LexState *ls) {
519  ls->lastline = ls->linenumber;
520  if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
521    ls->t = ls->lookahead;  /* use this one */
522    ls->lookahead.token = TK_EOS;  /* and discharge it */
523  }
524  else
525    ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
526}
527
528
529int luaX_lookahead (LexState *ls) {
530  lua_assert(ls->lookahead.token == TK_EOS);
531  ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
532  return ls->lookahead.token;
533}
534
535