1c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
2c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Parser implementation */
3c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
4c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* For a description, see the comments at end of this file */
5c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
6c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* XXX To do: error recovery */
7c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
8c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "Python.h"
9c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "pgenheaders.h"
10c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "token.h"
11c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "grammar.h"
12c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "node.h"
13c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "parser.h"
14c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "errcode.h"
15c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
16c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
17c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_DEBUG
18c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielextern int Py_DebugFlag;
19c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#define D(x) if (!Py_DebugFlag); else x
20c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else
21c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#define D(x)
22c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
23c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
24c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
25c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* STACK DATA TYPE */
26c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
27c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void s_reset(stack *);
28c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
29c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void
30c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniels_reset(stack *s)
31c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
32c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    s->s_top = &s->s_base[MAXSTACK];
33c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
34c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
35c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
37c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
38c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniels_push(register stack *s, dfa *d, node *parent)
39c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
40c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    register stackentry *top;
41c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (s->s_top == s->s_base) {
42c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        fprintf(stderr, "s_push: parser stack overflow\n");
43c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return E_NOMEM;
44c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
45c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    top = --s->s_top;
46c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    top->s_dfa = d;
47c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    top->s_parent = parent;
48c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    top->s_state = 0;
49c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return 0;
50c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
51c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
52c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_DEBUG
53c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
54c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void
55c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniels_pop(register stack *s)
56c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
57c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (s_empty(s))
58c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        Py_FatalError("s_pop: parser stack underflow -- FATAL");
59c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    s->s_top++;
60c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
61c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
62c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else /* !Py_DEBUG */
63c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
64c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#define s_pop(s) (s)->s_top++
65c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
66c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
67c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
68c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
69c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* PARSER CREATION */
70c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
71c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielparser_state *
72c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyParser_New(grammar *g, int start)
73c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
74c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    parser_state *ps;
75c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
76c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (!g->g_accel)
77c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyGrammar_AddAccelerators(g);
78c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (ps == NULL)
80c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL;
81c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ps->p_grammar = g;
82c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ps->p_flags = 0;
84c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
85c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ps->p_tree = PyNode_New(start);
86c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (ps->p_tree == NULL) {
87c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        PyMem_FREE(ps);
88c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return NULL;
89c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
90c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    s_reset(&ps->p_stack);
91c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
92c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return ps;
93c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
94c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
95c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid
96c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyParser_Delete(parser_state *ps)
97c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
98c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* NB If you want to save the parse tree,
99c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel       you must set p_tree to NULL before calling delparser! */
100c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyNode_Free(ps->p_tree);
101c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyMem_FREE(ps);
102c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
103c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
104c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
105c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* PARSER STACK OPERATIONS */
106c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
107c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
108c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielshift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
109c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
110c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int err;
111c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    assert(!s_empty(s));
112c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
113c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (err)
114c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return err;
115c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    s->s_top->s_state = newstate;
116c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return 0;
117c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
118c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
119c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
120c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielpush(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
121c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
122c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int err;
123c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    register node *n;
124c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    n = s->s_top->s_parent;
125c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    assert(!s_empty(s));
126c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
127c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (err)
128c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return err;
129c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    s->s_top->s_state = newstate;
130c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return s_push(s, d, CHILD(n, NCH(n)-1));
131c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
132c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
133c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
134c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* PARSER PROPER */
135c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
136c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int
137c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielclassify(parser_state *ps, int type, char *str)
138c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
139c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    grammar *g = ps->p_grammar;
140c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    register int n = g->g_ll.ll_nlabels;
141c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
142c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (type == NAME) {
143c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        register char *s = str;
144c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        register label *l = g->g_ll.ll_label;
145c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        register int i;
146c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        for (i = n; i > 0; i--, l++) {
147c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (l->lb_type != NAME || l->lb_str == NULL ||
148c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                l->lb_str[0] != s[0] ||
149c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                strcmp(l->lb_str, s) != 0)
150c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                continue;
151c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
152c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
153c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                s[0] == 'p' && strcmp(s, "print") == 0) {
154c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                break; /* no longer a keyword */
155c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
156c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
157c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            D(printf("It's a keyword\n"));
158c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            return n - i;
159c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
160c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
161c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
162c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    {
163c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        register label *l = g->g_ll.ll_label;
164c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        register int i;
165c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        for (i = n; i > 0; i--, l++) {
166c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (l->lb_type == type && l->lb_str == NULL) {
167c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                D(printf("It's a token we know\n"));
168c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return n - i;
169c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
170c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
171c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
172c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
173c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    D(printf("Illegal token\n"));
174c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    return -1;
175c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
176c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
177c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
178c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void
179c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielfuture_hack(parser_state *ps)
180c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
181c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    node *n = ps->p_stack.s_top->s_parent;
182c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    node *ch, *cch;
183c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int i;
184c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
185c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* from __future__ import ..., must have at least 4 children */
186c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    n = CHILD(n, 0);
187c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (NCH(n) < 4)
188c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return;
189c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ch = CHILD(n, 0);
190c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
191c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return;
192c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ch = CHILD(n, 1);
193c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
194c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
195c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return;
196c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ch = CHILD(n, 3);
197c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* ch can be a star, a parenthesis or import_as_names */
198c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (TYPE(ch) == STAR)
199c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return;
200c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (TYPE(ch) == LPAR)
201c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        ch = CHILD(n, 4);
202c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
203c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    for (i = 0; i < NCH(ch); i += 2) {
204c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        cch = CHILD(ch, i);
205c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
206c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            char *str_ch = STR(CHILD(cch, 0));
207c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
208c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
209c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
210c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
211c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
212c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
213c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
214c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
215c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
216c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
217c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif /* future keyword */
218c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
219c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielint
220c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyParser_AddToken(register parser_state *ps, register int type, char *str,
221c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                  int lineno, int col_offset, int *expected_ret)
222c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
223c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    register int ilabel;
224c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int err;
225c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
226c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
227c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
228c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Find out which label this token is */
229c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ilabel = classify(ps, type, str);
230c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (ilabel < 0)
231c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return E_SYNTAX;
232c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
233c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    /* Loop until the token is shifted or an error occurred */
234c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    for (;;) {
235c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        /* Fetch the current dfa and state */
236c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        register dfa *d = ps->p_stack.s_top->s_dfa;
237c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        register state *s = &d->d_state[ps->p_stack.s_top->s_state];
238c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
239c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        D(printf(" DFA '%s', state %d:",
240c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            d->d_name, ps->p_stack.s_top->s_state));
241c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
242c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        /* Check accelerator */
243c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (s->s_lower <= ilabel && ilabel < s->s_upper) {
244c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            register int x = s->s_accel[ilabel - s->s_lower];
245c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (x != -1) {
246c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (x & (1<<7)) {
247c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    /* Push non-terminal */
248c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    int nt = (x >> 8) + NT_OFFSET;
249c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    int arrow = x & ((1<<7)-1);
250c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    dfa *d1 = PyGrammar_FindDFA(
251c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        ps->p_grammar, nt);
252c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if ((err = push(&ps->p_stack, nt, d1,
253c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        arrow, lineno, col_offset)) > 0) {
254c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        D(printf(" MemError: push\n"));
255c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        return err;
256c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    }
257c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    D(printf(" Push ...\n"));
258c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    continue;
259c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
260c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
261c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Shift the token */
262c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if ((err = shift(&ps->p_stack, type, str,
263c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                x, lineno, col_offset)) > 0) {
264c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    D(printf(" MemError: shift.\n"));
265c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    return err;
266c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
267c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                D(printf(" Shift.\n"));
268c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Pop while we are in an accept-only state */
269c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                while (s = &d->d_state
270c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                                [ps->p_stack.s_top->s_state],
271c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    s->s_accept && s->s_narcs == 1) {
272c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    D(printf("  DFA '%s', state %d: "
273c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                             "Direct pop.\n",
274c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                             d->d_name,
275c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                             ps->p_stack.s_top->s_state));
276c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
277c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if (d->d_name[0] == 'i' &&
278c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        strcmp(d->d_name,
279c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                           "import_stmt") == 0)
280c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        future_hack(ps);
281c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
282c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    s_pop(&ps->p_stack);
283c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    if (s_empty(&ps->p_stack)) {
284c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        D(printf("  ACCEPT.\n"));
285c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                        return E_DONE;
286c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    }
287c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    d = ps->p_stack.s_top->s_dfa;
288c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                }
289c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return E_OK;
290c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
291c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
292c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
293c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (s->s_accept) {
294c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
295c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (d->d_name[0] == 'i' &&
296c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                strcmp(d->d_name, "import_stmt") == 0)
297c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                future_hack(ps);
298c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif
299c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            /* Pop this dfa and try again */
300c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            s_pop(&ps->p_stack);
301c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            D(printf(" Pop ...\n"));
302c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (s_empty(&ps->p_stack)) {
303c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                D(printf(" Error: bottom of stack.\n"));
304c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                return E_SYNTAX;
305c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
306c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            continue;
307c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
308c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
309c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        /* Stuck, report syntax error */
310c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        D(printf(" Error.\n"));
311c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (expected_ret) {
312c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            if (s->s_lower == s->s_upper - 1) {
313c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                /* Only one possible expected token */
314c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                *expected_ret = ps->p_grammar->
315c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    g_ll.ll_label[s->s_lower].lb_type;
316c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
317c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            else
318c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                *expected_ret = -1;
319c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
320c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return E_SYNTAX;
321c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
322c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
323c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
324c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
325c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_DEBUG
326c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
327c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* DEBUG OUTPUT */
328c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
329c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid
330c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldumptree(grammar *g, node *n)
331c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
332c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int i;
333c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
334c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (n == NULL)
335c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        printf("NIL");
336c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    else {
337c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        label l;
338c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        l.lb_type = TYPE(n);
339c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        l.lb_str = STR(n);
340c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        printf("%s", PyGrammar_LabelRepr(&l));
341c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (ISNONTERMINAL(TYPE(n))) {
342c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            printf("(");
343c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            for (i = 0; i < NCH(n); i++) {
344c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                if (i > 0)
345c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                    printf(",");
346c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel                dumptree(g, CHILD(n, i));
347c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            }
348c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            printf(")");
349c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        }
350c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
351c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
352c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
353c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid
354c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielshowtree(grammar *g, node *n)
355c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
356c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    int i;
357c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
358c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (n == NULL)
359c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        return;
360c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (ISNONTERMINAL(TYPE(n))) {
361c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        for (i = 0; i < NCH(n); i++)
362c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            showtree(g, CHILD(n, i));
363c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
364c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    else if (ISTERMINAL(TYPE(n))) {
365c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        printf("%s", _PyParser_TokenNames[TYPE(n)]);
366c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        if (TYPE(n) == NUMBER || TYPE(n) == NAME)
367c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel            printf("(%s)", STR(n));
368c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        printf(" ");
369c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
370c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    else
371c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        printf("? ");
372c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
373c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
374c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid
375c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielprinttree(parser_state *ps)
376c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{
377c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    if (Py_DebugFlag) {
378c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        printf("Parse tree:\n");
379c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        dumptree(ps->p_grammar, ps->p_tree);
380c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        printf("\n");
381c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        printf("Tokens:\n");
382c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        showtree(ps->p_grammar, ps->p_tree);
383c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel        printf("\n");
384c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    }
385c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    printf("Listing:\n");
386c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    PyNode_ListTree(ps->p_tree);
387c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    printf("\n");
388c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel}
389c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
390c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif /* Py_DEBUG */
391c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
392c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/*
393c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
394c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielDescription
395c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel-----------
396c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
397c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielThe parser's interface is different than usual: the function addtoken()
398c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielmust be called for each token in the input.  This makes it possible to
399c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielturn it into an incremental parsing system later.  The parsing system
400c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielconstructs a parse tree as it goes.
401c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
402c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielA parsing rule is represented as a Deterministic Finite-state Automaton
403c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel(DFA).  A node in a DFA represents a state of the parser; an arc represents
404c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniela transition.  Transitions are either labeled with terminal symbols or
405c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielwith non-terminals.  When the parser decides to follow an arc labeled
406c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielwith a non-terminal, it is invoked recursively with the DFA representing
407c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielthe parsing rule for that as its initial state; when that DFA accepts,
408c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielthe parser that invoked it continues.  The parse tree constructed by the
409c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielrecursively called parser is inserted as a child in the current parse tree.
410c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
411c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielThe DFA's can be constructed automatically from a more conventional
412c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniellanguage description.  An extended LL(1) grammar (ELL(1)) is suitable.
413c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielCertain restrictions make the parser's life easier: rules that can produce
414c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielthe empty string should be outlawed (there are other ways to put loops
415c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielor optional parts in the language).  To avoid the need to construct
416c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielFIRST sets, we can require that all but the last alternative of a rule
417c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel(really: arc going out of a DFA's state) must begin with a terminal
418c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielsymbol.
419c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
420c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielAs an example, consider this grammar:
421c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
422c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielexpr:   term (OP term)*
423c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielterm:   CONSTANT | '(' expr ')'
424c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
425c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielThe DFA corresponding to the rule for expr is:
426c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
427c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel------->.---term-->.------->
428c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    ^          |
429c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    |          |
430c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel    \----OP----/
431c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
432c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielThe parse tree generated for the input a+b is:
433c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
434c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
435c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel
436c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel*/
437