1
2/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
8#include "Python.h"
9#include "pgenheaders.h"
10#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
16
17#ifdef Py_DEBUG
18extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
20#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
27static void s_reset(stack *);
28
29static void
30s_reset(stack *s)
31{
32    s->s_top = &s->s_base[MAXSTACK];
33}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
37static int
38s_push(register stack *s, dfa *d, node *parent)
39{
40    register stackentry *top;
41    if (s->s_top == s->s_base) {
42        fprintf(stderr, "s_push: parser stack overflow\n");
43        return E_NOMEM;
44    }
45    top = --s->s_top;
46    top->s_dfa = d;
47    top->s_parent = parent;
48    top->s_state = 0;
49    return 0;
50}
51
52#ifdef Py_DEBUG
53
54static void
55s_pop(register stack *s)
56{
57    if (s_empty(s))
58        Py_FatalError("s_pop: parser stack underflow -- FATAL");
59    s->s_top++;
60}
61
62#else /* !Py_DEBUG */
63
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
72PyParser_New(grammar *g, int start)
73{
74    parser_state *ps;
75
76    if (!g->g_accel)
77        PyGrammar_AddAccelerators(g);
78    ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79    if (ps == NULL)
80        return NULL;
81    ps->p_grammar = g;
82#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83    ps->p_flags = 0;
84#endif
85    ps->p_tree = PyNode_New(start);
86    if (ps->p_tree == NULL) {
87        PyMem_FREE(ps);
88        return NULL;
89    }
90    s_reset(&ps->p_stack);
91    (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
92    return ps;
93}
94
95void
96PyParser_Delete(parser_state *ps)
97{
98    /* NB If you want to save the parse tree,
99       you must set p_tree to NULL before calling delparser! */
100    PyNode_Free(ps->p_tree);
101    PyMem_FREE(ps);
102}
103
104
105/* PARSER STACK OPERATIONS */
106
107static int
108shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
109{
110    int err;
111    assert(!s_empty(s));
112    err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
113    if (err)
114        return err;
115    s->s_top->s_state = newstate;
116    return 0;
117}
118
119static int
120push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
121{
122    int err;
123    register node *n;
124    n = s->s_top->s_parent;
125    assert(!s_empty(s));
126    err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
127    if (err)
128        return err;
129    s->s_top->s_state = newstate;
130    return s_push(s, d, CHILD(n, NCH(n)-1));
131}
132
133
134/* PARSER PROPER */
135
136static int
137classify(parser_state *ps, int type, char *str)
138{
139    grammar *g = ps->p_grammar;
140    register int n = g->g_ll.ll_nlabels;
141
142    if (type == NAME) {
143        register char *s = str;
144        register label *l = g->g_ll.ll_label;
145        register int i;
146        for (i = n; i > 0; i--, l++) {
147            if (l->lb_type != NAME || l->lb_str == NULL ||
148                l->lb_str[0] != s[0] ||
149                strcmp(l->lb_str, s) != 0)
150                continue;
151#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
152            if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
153                s[0] == 'p' && strcmp(s, "print") == 0) {
154                break; /* no longer a keyword */
155            }
156#endif
157            D(printf("It's a keyword\n"));
158            return n - i;
159        }
160    }
161
162    {
163        register label *l = g->g_ll.ll_label;
164        register int i;
165        for (i = n; i > 0; i--, l++) {
166            if (l->lb_type == type && l->lb_str == NULL) {
167                D(printf("It's a token we know\n"));
168                return n - i;
169            }
170        }
171    }
172
173    D(printf("Illegal token\n"));
174    return -1;
175}
176
177#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
178static void
179future_hack(parser_state *ps)
180{
181    node *n = ps->p_stack.s_top->s_parent;
182    node *ch, *cch;
183    int i;
184
185    /* from __future__ import ..., must have at least 4 children */
186    n = CHILD(n, 0);
187    if (NCH(n) < 4)
188        return;
189    ch = CHILD(n, 0);
190    if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
191        return;
192    ch = CHILD(n, 1);
193    if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
194        strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
195        return;
196    ch = CHILD(n, 3);
197    /* ch can be a star, a parenthesis or import_as_names */
198    if (TYPE(ch) == STAR)
199        return;
200    if (TYPE(ch) == LPAR)
201        ch = CHILD(n, 4);
202
203    for (i = 0; i < NCH(ch); i += 2) {
204        cch = CHILD(ch, i);
205        if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
206            char *str_ch = STR(CHILD(cch, 0));
207            if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
208                ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
209            } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
210                ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
211            } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
212                ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
213            }
214        }
215    }
216}
217#endif /* future keyword */
218
219int
220PyParser_AddToken(register parser_state *ps, register int type, char *str,
221                  int lineno, int col_offset, int *expected_ret)
222{
223    register int ilabel;
224    int err;
225
226    D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
227
228    /* Find out which label this token is */
229    ilabel = classify(ps, type, str);
230    if (ilabel < 0)
231        return E_SYNTAX;
232
233    /* Loop until the token is shifted or an error occurred */
234    for (;;) {
235        /* Fetch the current dfa and state */
236        register dfa *d = ps->p_stack.s_top->s_dfa;
237        register state *s = &d->d_state[ps->p_stack.s_top->s_state];
238
239        D(printf(" DFA '%s', state %d:",
240            d->d_name, ps->p_stack.s_top->s_state));
241
242        /* Check accelerator */
243        if (s->s_lower <= ilabel && ilabel < s->s_upper) {
244            register int x = s->s_accel[ilabel - s->s_lower];
245            if (x != -1) {
246                if (x & (1<<7)) {
247                    /* Push non-terminal */
248                    int nt = (x >> 8) + NT_OFFSET;
249                    int arrow = x & ((1<<7)-1);
250                    dfa *d1 = PyGrammar_FindDFA(
251                        ps->p_grammar, nt);
252                    if ((err = push(&ps->p_stack, nt, d1,
253                        arrow, lineno, col_offset)) > 0) {
254                        D(printf(" MemError: push\n"));
255                        return err;
256                    }
257                    D(printf(" Push ...\n"));
258                    continue;
259                }
260
261                /* Shift the token */
262                if ((err = shift(&ps->p_stack, type, str,
263                                x, lineno, col_offset)) > 0) {
264                    D(printf(" MemError: shift.\n"));
265                    return err;
266                }
267                D(printf(" Shift.\n"));
268                /* Pop while we are in an accept-only state */
269                while (s = &d->d_state
270                                [ps->p_stack.s_top->s_state],
271                    s->s_accept && s->s_narcs == 1) {
272                    D(printf("  DFA '%s', state %d: "
273                             "Direct pop.\n",
274                             d->d_name,
275                             ps->p_stack.s_top->s_state));
276#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
277                    if (d->d_name[0] == 'i' &&
278                        strcmp(d->d_name,
279                           "import_stmt") == 0)
280                        future_hack(ps);
281#endif
282                    s_pop(&ps->p_stack);
283                    if (s_empty(&ps->p_stack)) {
284                        D(printf("  ACCEPT.\n"));
285                        return E_DONE;
286                    }
287                    d = ps->p_stack.s_top->s_dfa;
288                }
289                return E_OK;
290            }
291        }
292
293        if (s->s_accept) {
294#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
295            if (d->d_name[0] == 'i' &&
296                strcmp(d->d_name, "import_stmt") == 0)
297                future_hack(ps);
298#endif
299            /* Pop this dfa and try again */
300            s_pop(&ps->p_stack);
301            D(printf(" Pop ...\n"));
302            if (s_empty(&ps->p_stack)) {
303                D(printf(" Error: bottom of stack.\n"));
304                return E_SYNTAX;
305            }
306            continue;
307        }
308
309        /* Stuck, report syntax error */
310        D(printf(" Error.\n"));
311        if (expected_ret) {
312            if (s->s_lower == s->s_upper - 1) {
313                /* Only one possible expected token */
314                *expected_ret = ps->p_grammar->
315                    g_ll.ll_label[s->s_lower].lb_type;
316            }
317            else
318                *expected_ret = -1;
319        }
320        return E_SYNTAX;
321    }
322}
323
324
325#ifdef Py_DEBUG
326
327/* DEBUG OUTPUT */
328
329void
330dumptree(grammar *g, node *n)
331{
332    int i;
333
334    if (n == NULL)
335        printf("NIL");
336    else {
337        label l;
338        l.lb_type = TYPE(n);
339        l.lb_str = STR(n);
340        printf("%s", PyGrammar_LabelRepr(&l));
341        if (ISNONTERMINAL(TYPE(n))) {
342            printf("(");
343            for (i = 0; i < NCH(n); i++) {
344                if (i > 0)
345                    printf(",");
346                dumptree(g, CHILD(n, i));
347            }
348            printf(")");
349        }
350    }
351}
352
353void
354showtree(grammar *g, node *n)
355{
356    int i;
357
358    if (n == NULL)
359        return;
360    if (ISNONTERMINAL(TYPE(n))) {
361        for (i = 0; i < NCH(n); i++)
362            showtree(g, CHILD(n, i));
363    }
364    else if (ISTERMINAL(TYPE(n))) {
365        printf("%s", _PyParser_TokenNames[TYPE(n)]);
366        if (TYPE(n) == NUMBER || TYPE(n) == NAME)
367            printf("(%s)", STR(n));
368        printf(" ");
369    }
370    else
371        printf("? ");
372}
373
374void
375printtree(parser_state *ps)
376{
377    if (Py_DebugFlag) {
378        printf("Parse tree:\n");
379        dumptree(ps->p_grammar, ps->p_tree);
380        printf("\n");
381        printf("Tokens:\n");
382        showtree(ps->p_grammar, ps->p_tree);
383        printf("\n");
384    }
385    printf("Listing:\n");
386    PyNode_ListTree(ps->p_tree);
387    printf("\n");
388}
389
390#endif /* Py_DEBUG */
391
392/*
393
394Description
395-----------
396
397The parser's interface is different than usual: the function addtoken()
398must be called for each token in the input.  This makes it possible to
399turn it into an incremental parsing system later.  The parsing system
400constructs a parse tree as it goes.
401
402A parsing rule is represented as a Deterministic Finite-state Automaton
403(DFA).  A node in a DFA represents a state of the parser; an arc represents
404a transition.  Transitions are either labeled with terminal symbols or
405with non-terminals.  When the parser decides to follow an arc labeled
406with a non-terminal, it is invoked recursively with the DFA representing
407the parsing rule for that as its initial state; when that DFA accepts,
408the parser that invoked it continues.  The parse tree constructed by the
409recursively called parser is inserted as a child in the current parse tree.
410
411The DFA's can be constructed automatically from a more conventional
412language description.  An extended LL(1) grammar (ELL(1)) is suitable.
413Certain restrictions make the parser's life easier: rules that can produce
414the empty string should be outlawed (there are other ways to put loops
415or optional parts in the language).  To avoid the need to construct
416FIRST sets, we can require that all but the last alternative of a rule
417(really: arc going out of a DFA's state) must begin with a terminal
418symbol.
419
420As an example, consider this grammar:
421
422expr:   term (OP term)*
423term:   CONSTANT | '(' expr ')'
424
425The DFA corresponding to the rule for expr is:
426
427------->.---term-->.------->
428    ^          |
429    |          |
430    \----OP----/
431
432The parse tree generated for the input a+b is:
433
434(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
435
436*/
437