1/*
2 * GAS-compatible re2c lexer
3 *
4 *  Copyright (C) 2005-2007  Peter Johnson
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the author nor the names of other contributors
15 *    may be used to endorse or promote products derived from this
16 *    software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND OTHER CONTRIBUTORS ``AS IS''
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR OTHER CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30#include <util.h>
31
32#include <libyasm.h>
33
34#include "modules/parsers/gas/gas-parser.h"
35
36
37#define BSIZE   8192
38
39#define YYCURSOR        cursor
40#define YYLIMIT         (s->lim)
41#define YYMARKER        (s->ptr)
42#define YYFILL(n)       {cursor = fill(parser_gas, cursor);}
43
44#define RETURN(i)       do {s->cur = cursor; parser_gas->tokch = s->tok[0]; \
45                         return i;} while (0)
46
47#define SCANINIT()      {s->tok = cursor;}
48
49#define TOK             ((char *)s->tok)
50#define TOKLEN          (size_t)(cursor-s->tok)
51
52/* Bridge function to convert byte-oriented parser with line-oriented
53 * preprocessor.
54 */
55static size_t
56preproc_input(yasm_parser_gas *parser_gas, /*@out@*/ YYCTYPE *buf,
57              size_t max_size)
58{
59    size_t tot=0;
60    while (max_size > 0) {
61        size_t n;
62
63        if (!parser_gas->line) {
64            parser_gas->line = yasm_preproc_get_line(parser_gas->preproc);
65            if (!parser_gas->line)
66                return tot; /* EOF */
67            parser_gas->linepos = parser_gas->line;
68            parser_gas->lineleft = strlen(parser_gas->line) + 1;
69            parser_gas->line[parser_gas->lineleft-1] = '\n';
70        }
71
72        n = parser_gas->lineleft<max_size ? parser_gas->lineleft : max_size;
73        strncpy((char *)buf+tot, parser_gas->linepos, n);
74
75        if (n == parser_gas->lineleft) {
76            yasm_xfree(parser_gas->line);
77            parser_gas->line = NULL;
78        } else {
79            parser_gas->lineleft -= n;
80            parser_gas->linepos += n;
81        }
82
83        tot += n;
84        max_size -= n;
85    }
86    return tot;
87}
88#if 0
89static size_t
90fill_input(void *d, unsigned char *buf, size_t max)
91{
92    return yasm_preproc_input((yasm_preproc *)d, (char *)buf, max);
93}
94#endif
95static YYCTYPE *
96fill(yasm_parser_gas *parser_gas, YYCTYPE *cursor)
97{
98    yasm_scanner *s = &parser_gas->s;
99    int first = 0;
100    if(!s->eof){
101        size_t cnt = s->tok - s->bot;
102        if(cnt){
103            memmove(s->bot, s->tok, (size_t)(s->lim - s->tok));
104            s->tok = s->bot;
105            s->ptr -= cnt;
106            cursor -= cnt;
107            s->lim -= cnt;
108        }
109        if (!s->bot)
110            first = 1;
111        if((s->top - s->lim) < BSIZE){
112            YYCTYPE *buf = yasm_xmalloc((size_t)(s->lim - s->bot) + BSIZE);
113            memcpy(buf, s->tok, (size_t)(s->lim - s->tok));
114            s->tok = buf;
115            s->ptr = &buf[s->ptr - s->bot];
116            cursor = &buf[cursor - s->bot];
117            s->lim = &buf[s->lim - s->bot];
118            s->top = &s->lim[BSIZE];
119            if (s->bot)
120                yasm_xfree(s->bot);
121            s->bot = buf;
122        }
123        if((cnt = preproc_input(parser_gas, s->lim, BSIZE)) == 0) {
124            s->eof = &s->lim[cnt]; *s->eof++ = '\n';
125        }
126        s->lim += cnt;
127        if (first && parser_gas->save_input) {
128            int i;
129            YYCTYPE *saveline;
130            parser_gas->save_last ^= 1;
131            saveline = parser_gas->save_line[parser_gas->save_last];
132            /* save next line into cur_line */
133            for (i=0; i<79 && &s->tok[i] < s->lim && s->tok[i] != '\n'; i++)
134                saveline[i] = s->tok[i];
135            saveline[i] = '\0';
136        }
137    }
138    return cursor;
139}
140
141static YYCTYPE *
142save_line(yasm_parser_gas *parser_gas, YYCTYPE *cursor)
143{
144    yasm_scanner *s = &parser_gas->s;
145    int i = 0;
146    YYCTYPE *saveline;
147
148    parser_gas->save_last ^= 1;
149    saveline = parser_gas->save_line[parser_gas->save_last];
150
151    /* save next line into cur_line */
152    if ((YYLIMIT - YYCURSOR) < 80)
153        YYFILL(80);
154    for (i=0; i<79 && &cursor[i] < s->lim && cursor[i] != '\n'; i++)
155        saveline[i] = cursor[i];
156    saveline[i] = '\0';
157    return cursor;
158}
159
160/* starting size of string buffer */
161#define STRBUF_ALLOC_SIZE       128
162
163/* string buffer used when parsing strings/character constants */
164static YYCTYPE *strbuf = NULL;
165
166/* length of strbuf (including terminating NULL character) */
167static size_t strbuf_size = 0;
168
169static void
170strbuf_append(size_t count, YYCTYPE *cursor, yasm_scanner *s, int ch)
171{
172    if (count >= strbuf_size) {
173        strbuf = yasm_xrealloc(strbuf, strbuf_size + STRBUF_ALLOC_SIZE);
174        strbuf_size += STRBUF_ALLOC_SIZE;
175    }
176    strbuf[count] = ch;
177}
178
179/*!re2c
180  any = [\000-\377];
181  digit = [0-9];
182  iletter = [a-zA-Z];
183  bindigit = [01];
184  octdigit = [0-7];
185  hexdigit = [0-9a-fA-F];
186  ws = [ \t\r];
187  dquot = ["];
188*/
189
190
191int
192gas_parser_lex(YYSTYPE *lvalp, yasm_parser_gas *parser_gas)
193{
194    yasm_scanner *s = &parser_gas->s;
195    YYCTYPE *cursor = s->cur;
196    size_t count;
197    YYCTYPE savech;
198
199    /* Handle one token of lookahead */
200    if (parser_gas->peek_token != NONE) {
201        int tok = parser_gas->peek_token;
202        *lvalp = parser_gas->peek_tokval;  /* structure copy */
203        parser_gas->tokch = parser_gas->peek_tokch;
204        parser_gas->peek_token = NONE;
205        return tok;
206    }
207
208    /* Catch EOF */
209    if (s->eof && cursor == s->eof)
210        return 0;
211
212    /* Jump to proper "exclusive" states */
213    switch (parser_gas->state) {
214        case COMMENT:
215            goto comment;
216        case SECTION_DIRECTIVE:
217            goto section_directive;
218        case NASM_FILENAME:
219            goto nasm_filename;
220        default:
221            break;
222    }
223
224scan:
225    SCANINIT();
226
227    /*!re2c
228        /* standard decimal integer */
229        ([1-9] digit*) | "0" {
230            savech = s->tok[TOKLEN];
231            s->tok[TOKLEN] = '\0';
232            lvalp->intn = yasm_intnum_create_dec(TOK);
233            s->tok[TOKLEN] = savech;
234            RETURN(INTNUM);
235        }
236
237        /* 0b10010011 - binary number */
238        '0b' bindigit+ {
239            savech = s->tok[TOKLEN];
240            s->tok[TOKLEN] = '\0';
241            lvalp->intn = yasm_intnum_create_bin(TOK+2);
242            s->tok[TOKLEN] = savech;
243            RETURN(INTNUM);
244        }
245
246        /* 0777 - octal number */
247        "0" octdigit+ {
248            savech = s->tok[TOKLEN];
249            s->tok[TOKLEN] = '\0';
250            lvalp->intn = yasm_intnum_create_oct(TOK);
251            s->tok[TOKLEN] = savech;
252            RETURN(INTNUM);
253        }
254
255        /* 0xAA - hexidecimal number */
256        '0x' hexdigit+ {
257            savech = s->tok[TOKLEN];
258            s->tok[TOKLEN] = '\0';
259            /* skip 0 and x */
260            lvalp->intn = yasm_intnum_create_hex(TOK+2);
261            s->tok[TOKLEN] = savech;
262            RETURN(INTNUM);
263        }
264
265        /* floating point value */
266        [-+]? digit* "." digit+ ('e' [-+]? digit+)? {
267            savech = s->tok[TOKLEN];
268            s->tok[TOKLEN] = '\0';
269            lvalp->flt = yasm_floatnum_create(TOK);
270            s->tok[TOKLEN] = savech;
271            RETURN(FLTNUM);
272        }
273        [-+]? digit+ "." digit* ('e' [-+]? digit+)? {
274            savech = s->tok[TOKLEN];
275            s->tok[TOKLEN] = '\0';
276            lvalp->flt = yasm_floatnum_create(TOK);
277            s->tok[TOKLEN] = savech;
278            RETURN(FLTNUM);
279        }
280        "0" [DdEeFfTt] [-+]? digit* ("." digit*)? ('e' [-+]? digit+)? {
281            savech = s->tok[TOKLEN];
282            s->tok[TOKLEN] = '\0';
283            lvalp->flt = yasm_floatnum_create(TOK+2);
284            s->tok[TOKLEN] = savech;
285            RETURN(FLTNUM);
286        }
287
288        /* character constant values */
289        ['] {
290            goto charconst;
291        }
292
293        /* string constant values */
294        dquot {
295            goto stringconst;
296        }
297
298        /* operators */
299        "<<"                    { RETURN(LEFT_OP); }
300        ">>"                    { RETURN(RIGHT_OP); }
301        "<"                     { RETURN(LEFT_OP); }
302        ">"                     { RETURN(RIGHT_OP); }
303        [-+|^!*&/~$():@=,]      { RETURN(s->tok[0]); }
304        ";"     {
305            parser_gas->state = INITIAL;
306            RETURN(s->tok[0]);
307        }
308
309        /* identifier */
310        [a-zA-Z_.][a-zA-Z0-9_$.]* {
311            lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN);
312            lvalp->str.len = TOKLEN;
313            RETURN(ID);
314        }
315
316        /* identifier with @ */
317        [a-zA-Z_.]([a-zA-Z0-9_$.]*[@][a-zA-Z0-9_$.]*)+ {
318            /* if @ not part of ID, move the scanner cursor to the first @ */
319            if (!((yasm_objfmt_base *)p_object->objfmt)->module->id_at_ok)
320                cursor = (unsigned char *)strchr(TOK, '@');
321            lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN);
322            lvalp->str.len = TOKLEN;
323            RETURN(ID);
324        }
325
326        /* register or segment register */
327        [%][a-zA-Z0-9]+ {
328            savech = s->tok[TOKLEN];
329            s->tok[TOKLEN] = '\0';
330            if (parser_gas->is_nasm_preproc && strcmp(TOK+1, "line") == 0) {
331                s->tok[TOKLEN] = savech;
332                RETURN(NASM_LINE_MARKER);
333            }
334
335            switch (yasm_arch_parse_check_regtmod
336                    (p_object->arch, TOK+1, TOKLEN-1, &lvalp->arch_data)) {
337                case YASM_ARCH_REG:
338                    s->tok[TOKLEN] = savech;
339                    RETURN(REG);
340                case YASM_ARCH_REGGROUP:
341                    s->tok[TOKLEN] = savech;
342                    RETURN(REGGROUP);
343                case YASM_ARCH_SEGREG:
344                    s->tok[TOKLEN] = savech;
345                    RETURN(SEGREG);
346                default:
347                    break;
348            }
349            yasm_error_set(YASM_ERROR_GENERAL,
350                           N_("Unrecognized register name `%s'"), s->tok);
351            s->tok[TOKLEN] = savech;
352            lvalp->arch_data = 0;
353            RETURN(REG);
354        }
355
356        /* local label */
357        [0-9] ':' {
358            /* increment label index */
359            parser_gas->local[s->tok[0]-'0']++;
360            /* build local label name */
361            lvalp->str.contents = yasm_xmalloc(30);
362            lvalp->str.len =
363                sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0],
364                        parser_gas->local[s->tok[0]-'0']);
365            RETURN(LABEL);
366        }
367
368        /* local label forward reference */
369        [0-9] 'f' {
370            /* build local label name */
371            lvalp->str.contents = yasm_xmalloc(30);
372            lvalp->str.len =
373                sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0],
374                        parser_gas->local[s->tok[0]-'0']+1);
375            RETURN(ID);
376        }
377
378        /* local label backward reference */
379        [0-9] 'b' {
380            /* build local label name */
381            lvalp->str.contents = yasm_xmalloc(30);
382            lvalp->str.len =
383                sprintf(lvalp->str.contents, "L%c\001%lu", s->tok[0],
384                        parser_gas->local[s->tok[0]-'0']);
385            RETURN(ID);
386        }
387
388        "/*"                    { parser_gas->state = COMMENT; goto comment; }
389        "#"                     {
390            if (parser_gas->is_cpp_preproc)
391            {
392                RETURN(CPP_LINE_MARKER);
393            } else
394                goto line_comment;
395        }
396        "//"                    { goto line_comment; }
397
398        ws+                     { goto scan; }
399
400        "\n"                    {
401            if (parser_gas->save_input)
402                cursor = save_line(parser_gas, cursor);
403            parser_gas->state = INITIAL;
404            RETURN(s->tok[0]);
405        }
406
407        any {
408            yasm_warn_set(YASM_WARN_UNREC_CHAR,
409                          N_("ignoring unrecognized character `%s'"),
410                          yasm__conv_unprint(s->tok[0]));
411            goto scan;
412        }
413    */
414
415    /* C-style comment; nesting not supported */
416comment:
417    SCANINIT();
418
419    /*!re2c
420        /* End of comment */
421        "*/"    { parser_gas->state = INITIAL; goto scan; }
422
423        "\n"                    {
424            if (parser_gas->save_input)
425                cursor = save_line(parser_gas, cursor);
426            RETURN(s->tok[0]);
427        }
428
429        any     {
430            if (cursor == s->eof)
431                return 0;
432            goto comment;
433        }
434    */
435
436    /* Single line comment. */
437line_comment:
438    /*!re2c
439        (any \ [\n])*   { goto scan; }
440    */
441
442    /* .section directive (the section name portion thereof) */
443section_directive:
444    SCANINIT();
445
446    /*!re2c
447        [a-zA-Z0-9_$.-]+ {
448            lvalp->str.contents = yasm__xstrndup(TOK, TOKLEN);
449            lvalp->str.len = TOKLEN;
450            parser_gas->state = INITIAL;
451            RETURN(ID);
452        }
453
454        dquot                   { goto stringconst; }
455
456        ws+                     { goto section_directive; }
457
458        ","                     {
459            parser_gas->state = INITIAL;
460            RETURN(s->tok[0]);
461        }
462
463        "\n"                    {
464            if (parser_gas->save_input)
465                cursor = save_line(parser_gas, cursor);
466            parser_gas->state = INITIAL;
467            RETURN(s->tok[0]);
468        }
469
470        any {
471            yasm_warn_set(YASM_WARN_UNREC_CHAR,
472                          N_("ignoring unrecognized character `%s'"),
473                          yasm__conv_unprint(s->tok[0]));
474            goto section_directive;
475        }
476    */
477
478    /* filename portion of nasm preproc %line */
479nasm_filename:
480    strbuf = yasm_xmalloc(STRBUF_ALLOC_SIZE);
481    strbuf_size = STRBUF_ALLOC_SIZE;
482    count = 0;
483
484nasm_filename_scan:
485    SCANINIT();
486
487    /*!re2c
488        "\n" {
489            strbuf_append(count++, cursor, s, '\0');
490            lvalp->str.contents = (char *)strbuf;
491            lvalp->str.len = count;
492            parser_gas->state = INITIAL;
493            RETURN(STRING);
494        }
495
496        ws+ { goto nasm_filename_scan; }
497
498        any {
499            if (cursor == s->eof) {
500                strbuf_append(count++, cursor, s, '\0');
501                lvalp->str.contents = (char *)strbuf;
502                lvalp->str.len = count;
503                parser_gas->state = INITIAL;
504                RETURN(STRING);
505            }
506            strbuf_append(count++, cursor, s, s->tok[0]);
507            goto nasm_filename_scan;
508        }
509    */
510
511    /* character constant values */
512charconst:
513    /*TODO*/
514
515    /* string constant values */
516stringconst:
517    strbuf = yasm_xmalloc(STRBUF_ALLOC_SIZE);
518    strbuf_size = STRBUF_ALLOC_SIZE;
519    count = 0;
520
521stringconst_scan:
522    SCANINIT();
523
524    /*!re2c
525        /* Handle escaped character by copying both and continuing. */
526        "\\".   {
527            if (cursor == s->eof) {
528                yasm_error_set(YASM_ERROR_SYNTAX,
529                               N_("unexpected end of file in string"));
530                lvalp->str.contents = (char *)strbuf;
531                lvalp->str.len = count;
532                RETURN(STRING);
533            }
534            strbuf_append(count++, cursor, s, '\\');
535            strbuf_append(count++, cursor, s, s->tok[1]);
536            goto stringconst_scan;
537        }
538
539        dquot   {
540            strbuf_append(count, cursor, s, '\0');
541            yasm_unescape_cstring(strbuf, &count);
542            lvalp->str.contents = (char *)strbuf;
543            lvalp->str.len = count;
544            RETURN(STRING);
545        }
546
547        any     {
548            if (cursor == s->eof) {
549                yasm_error_set(YASM_ERROR_SYNTAX,
550                               N_("unexpected end of file in string"));
551                lvalp->str.contents = (char *)strbuf;
552                lvalp->str.len = count;
553                RETURN(STRING);
554            }
555            strbuf_append(count++, cursor, s, s->tok[0]);
556            goto stringconst_scan;
557        }
558    */
559}
560