14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Parser implementation */
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* For a description, see the comments at end of this file */
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* XXX To do: error recovery */
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "Python.h"
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "pgenheaders.h"
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "token.h"
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "grammar.h"
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "node.h"
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "parser.h"
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "errcode.h"
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef Py_DEBUG
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmextern int Py_DebugFlag;
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define D(x) if (!Py_DebugFlag); else x
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#else
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define D(x)
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* STACK DATA TYPE */
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void s_reset(stack *);
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylms_reset(stack *s)
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    s->s_top = &s->s_base[MAXSTACK];
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylms_push(register stack *s, dfa *d, node *parent)
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    register stackentry *top;
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (s->s_top == s->s_base) {
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fprintf(stderr, "s_push: parser stack overflow\n");
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return E_NOMEM;
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    top = --s->s_top;
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    top->s_dfa = d;
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    top->s_parent = parent;
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    top->s_state = 0;
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return 0;
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef Py_DEBUG
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylms_pop(register stack *s)
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (s_empty(s))
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Py_FatalError("s_pop: parser stack underflow -- FATAL");
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    s->s_top++;
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#else /* !Py_DEBUG */
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define s_pop(s) (s)->s_top++
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PARSER CREATION */
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmparser_state *
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPyParser_New(grammar *g, int start)
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    parser_state *ps;
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (!g->g_accel)
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        PyGrammar_AddAccelerators(g);
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (ps == NULL)
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return NULL;
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ps->p_grammar = g;
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ps->p_flags = 0;
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ps->p_tree = PyNode_New(start);
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (ps->p_tree == NULL) {
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        PyMem_FREE(ps);
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return NULL;
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    s_reset(&ps->p_stack);
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return ps;
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPyParser_Delete(parser_state *ps)
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    /* NB If you want to save the parse tree,
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       you must set p_tree to NULL before calling delparser! */
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    PyNode_Free(ps->p_tree);
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    PyMem_FREE(ps);
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PARSER STACK OPERATIONS */
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmshift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int err;
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    assert(!s_empty(s));
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (err)
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return err;
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    s->s_top->s_state = newstate;
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return 0;
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmpush(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int err;
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    register node *n;
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n = s->s_top->s_parent;
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    assert(!s_empty(s));
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (err)
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return err;
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    s->s_top->s_state = newstate;
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return s_push(s, d, CHILD(n, NCH(n)-1));
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PARSER PROPER */
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclassify(parser_state *ps, int type, char *str)
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    grammar *g = ps->p_grammar;
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    register int n = g->g_ll.ll_nlabels;
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (type == NAME) {
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        register char *s = str;
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        register label *l = g->g_ll.ll_label;
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        register int i;
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (i = n; i > 0; i--, l++) {
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (l->lb_type != NAME || l->lb_str == NULL ||
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                l->lb_str[0] != s[0] ||
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                strcmp(l->lb_str, s) != 0)
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                continue;
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                s[0] == 'p' && strcmp(s, "print") == 0) {
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                break; /* no longer a keyword */
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            D(printf("It's a keyword\n"));
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return n - i;
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    {
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        register label *l = g->g_ll.ll_label;
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        register int i;
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (i = n; i > 0; i--, l++) {
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (l->lb_type == type && l->lb_str == NULL) {
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                D(printf("It's a token we know\n"));
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return n - i;
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    D(printf("Illegal token\n"));
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return -1;
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfuture_hack(parser_state *ps)
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    node *n = ps->p_stack.s_top->s_parent;
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    node *ch, *cch;
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    /* from __future__ import ..., must have at least 4 children */
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n = CHILD(n, 0);
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (NCH(n) < 4)
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return;
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ch = CHILD(n, 0);
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return;
1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ch = CHILD(n, 1);
1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return;
1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ch = CHILD(n, 3);
1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    /* ch can be a star, a parenthesis or import_as_names */
1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (TYPE(ch) == STAR)
1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return;
2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (TYPE(ch) == LPAR)
2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        ch = CHILD(n, 4);
2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = 0; i < NCH(ch); i += 2) {
2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        cch = CHILD(ch, i);
2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            char *str_ch = STR(CHILD(cch, 0));
2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif /* future keyword */
2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmint
2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPyParser_AddToken(register parser_state *ps, register int type, char *str,
2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                  int lineno, int col_offset, int *expected_ret)
2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    register int ilabel;
2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int err;
2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    /* Find out which label this token is */
2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ilabel = classify(ps, type, str);
2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (ilabel < 0)
2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return E_SYNTAX;
2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    /* Loop until the token is shifted or an error occurred */
2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (;;) {
2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        /* Fetch the current dfa and state */
2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        register dfa *d = ps->p_stack.s_top->s_dfa;
2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        register state *s = &d->d_state[ps->p_stack.s_top->s_state];
2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        D(printf(" DFA '%s', state %d:",
2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            d->d_name, ps->p_stack.s_top->s_state));
2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        /* Check accelerator */
2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (s->s_lower <= ilabel && ilabel < s->s_upper) {
2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            register int x = s->s_accel[ilabel - s->s_lower];
2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (x != -1) {
2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (x & (1<<7)) {
2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    /* Push non-terminal */
2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    int nt = (x >> 8) + NT_OFFSET;
2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    int arrow = x & ((1<<7)-1);
2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    dfa *d1 = PyGrammar_FindDFA(
2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        ps->p_grammar, nt);
2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if ((err = push(&ps->p_stack, nt, d1,
2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        arrow, lineno, col_offset)) > 0) {
2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        D(printf(" MemError: push\n"));
2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        return err;
2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    }
2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    D(printf(" Push ...\n"));
2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    continue;
2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                }
2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                /* Shift the token */
2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if ((err = shift(&ps->p_stack, type, str,
2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                x, lineno, col_offset)) > 0) {
2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    D(printf(" MemError: shift.\n"));
2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    return err;
2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                }
2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                D(printf(" Shift.\n"));
2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                /* Pop while we are in an accept-only state */
2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                while (s = &d->d_state
2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                [ps->p_stack.s_top->s_state],
2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    s->s_accept && s->s_narcs == 1) {
2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    D(printf("  DFA '%s', state %d: "
2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                             "Direct pop.\n",
2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                             d->d_name,
2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                             ps->p_stack.s_top->s_state));
2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if (d->d_name[0] == 'i' &&
2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        strcmp(d->d_name,
2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                           "import_stmt") == 0)
2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        future_hack(ps);
2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif
2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    s_pop(&ps->p_stack);
2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if (s_empty(&ps->p_stack)) {
2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        D(printf("  ACCEPT.\n"));
2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        return E_DONE;
2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    }
2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    d = ps->p_stack.s_top->s_dfa;
2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                }
2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return E_OK;
2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (s->s_accept) {
2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (d->d_name[0] == 'i' &&
2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                strcmp(d->d_name, "import_stmt") == 0)
2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                future_hack(ps);
2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif
2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            /* Pop this dfa and try again */
3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            s_pop(&ps->p_stack);
3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            D(printf(" Pop ...\n"));
3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (s_empty(&ps->p_stack)) {
3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                D(printf(" Error: bottom of stack.\n"));
3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                return E_SYNTAX;
3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            continue;
3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        /* Stuck, report syntax error */
3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        D(printf(" Error.\n"));
3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (expected_ret) {
3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (s->s_lower == s->s_upper - 1) {
3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                /* Only one possible expected token */
3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                *expected_ret = ps->p_grammar->
3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    g_ll.ll_label[s->s_lower].lb_type;
3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            else
3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                *expected_ret = -1;
3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return E_SYNTAX;
3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef Py_DEBUG
3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* DEBUG OUTPUT */
3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid
3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdumptree(grammar *g, node *n)
3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (n == NULL)
3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("NIL");
3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else {
3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        label l;
3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        l.lb_type = TYPE(n);
3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        l.lb_str = STR(n);
3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("%s", PyGrammar_LabelRepr(&l));
3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (ISNONTERMINAL(TYPE(n))) {
3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            printf("(");
3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for (i = 0; i < NCH(n); i++) {
3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (i > 0)
3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    printf(",");
3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                dumptree(g, CHILD(n, i));
3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            printf(")");
3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid
3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmshowtree(grammar *g, node *n)
3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (n == NULL)
3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return;
3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (ISNONTERMINAL(TYPE(n))) {
3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (i = 0; i < NCH(n); i++)
3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            showtree(g, CHILD(n, i));
3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else if (ISTERMINAL(TYPE(n))) {
3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("%s", _PyParser_TokenNames[TYPE(n)]);
3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (TYPE(n) == NUMBER || TYPE(n) == NAME)
3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            printf("(%s)", STR(n));
3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf(" ");
3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else
3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("? ");
3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid
3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmprinttree(parser_state *ps)
3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (Py_DebugFlag) {
3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("Parse tree:\n");
3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        dumptree(ps->p_grammar, ps->p_tree);
3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("\n");
3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("Tokens:\n");
3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        showtree(ps->p_grammar, ps->p_tree);
3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("\n");
3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    printf("Listing:\n");
3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    PyNode_ListTree(ps->p_tree);
3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    printf("\n");
3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif /* Py_DEBUG */
3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/*
3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDescription
3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm-----------
3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe parser's interface is different than usual: the function addtoken()
3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmmust be called for each token in the input.  This makes it possible to
3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmturn it into an incremental parsing system later.  The parsing system
4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmconstructs a parse tree as it goes.
4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmA parsing rule is represented as a Deterministic Finite-state Automaton
4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm(DFA).  A node in a DFA represents a state of the parser; an arc represents
4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylma transition.  Transitions are either labeled with terminal symbols or
4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmwith non-terminals.  When the parser decides to follow an arc labeled
4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmwith a non-terminal, it is invoked recursively with the DFA representing
4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthe parsing rule for that as its initial state; when that DFA accepts,
4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthe parser that invoked it continues.  The parse tree constructed by the
4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmrecursively called parser is inserted as a child in the current parse tree.
4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe DFA's can be constructed automatically from a more conventional
4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmlanguage description.  An extended LL(1) grammar (ELL(1)) is suitable.
4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmCertain restrictions make the parser's life easier: rules that can produce
4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthe empty string should be outlawed (there are other ways to put loops
4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmor optional parts in the language).  To avoid the need to construct
4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmFIRST sets, we can require that all but the last alternative of a rule
4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm(really: arc going out of a DFA's state) must begin with a terminal
4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmsymbol.
4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmAs an example, consider this grammar:
4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexpr:   term (OP term)*
4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmterm:   CONSTANT | '(' expr ')'
4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe DFA corresponding to the rule for expr is:
4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm------->.---term-->.------->
4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ^          |
4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    |          |
4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    \----OP----/
4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe parse tree generated for the input a+b is:
4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm*/
437