14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Parser implementation */ 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* For a description, see the comments at end of this file */ 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* XXX To do: error recovery */ 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "Python.h" 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "pgenheaders.h" 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "token.h" 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "grammar.h" 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "node.h" 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "parser.h" 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "errcode.h" 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef Py_DEBUG 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmextern int Py_DebugFlag; 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define D(x) if (!Py_DebugFlag); else x 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#else 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define D(x) 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* STACK DATA TYPE */ 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void s_reset(stack *); 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylms_reset(stack *s) 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_top = &s->s_base[MAXSTACK]; 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK]) 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylms_push(register stack *s, dfa *d, node *parent) 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register stackentry *top; 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s->s_top == s->s_base) { 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fprintf(stderr, "s_push: parser stack overflow\n"); 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return E_NOMEM; 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm top = --s->s_top; 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm top->s_dfa = d; 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm top->s_parent = parent; 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm top->s_state = 0; 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 0; 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef Py_DEBUG 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylms_pop(register stack *s) 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s_empty(s)) 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Py_FatalError("s_pop: parser stack underflow -- FATAL"); 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_top++; 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#else /* !Py_DEBUG */ 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define s_pop(s) (s)->s_top++ 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PARSER CREATION */ 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmparser_state * 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPyParser_New(grammar *g, int start) 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm parser_state *ps; 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (!g->g_accel) 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyGrammar_AddAccelerators(g); 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state)); 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (ps == NULL) 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return NULL; 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ps->p_grammar = g; 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ps->p_flags = 0; 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ps->p_tree = PyNode_New(start); 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (ps->p_tree == NULL) { 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyMem_FREE(ps); 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return NULL; 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s_reset(&ps->p_stack); 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree); 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return ps; 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPyParser_Delete(parser_state *ps) 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* NB If you want to save the parse tree, 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm you must set p_tree to NULL before calling delparser! */ 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyNode_Free(ps->p_tree); 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyMem_FREE(ps); 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PARSER STACK OPERATIONS */ 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmshift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset) 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int err; 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert(!s_empty(s)); 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset); 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (err) 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return err; 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_top->s_state = newstate; 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 0; 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmpush(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset) 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int err; 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register node *n; 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = s->s_top->s_parent; 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm assert(!s_empty(s)); 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset); 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (err) 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return err; 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_top->s_state = newstate; 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return s_push(s, d, CHILD(n, NCH(n)-1)); 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PARSER PROPER */ 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclassify(parser_state *ps, int type, char *str) 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm grammar *g = ps->p_grammar; 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register int n = g->g_ll.ll_nlabels; 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (type == NAME) { 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register char *s = str; 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register label *l = g->g_ll.ll_label; 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register int i; 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = n; i > 0; i--, l++) { 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (l->lb_type != NAME || l->lb_str == NULL || 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm l->lb_str[0] != s[0] || 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strcmp(l->lb_str, s) != 0) 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION && 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s[0] == 'p' && strcmp(s, "print") == 0) { 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break; /* no longer a keyword */ 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf("It's a keyword\n")); 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return n - i; 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm { 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register label *l = g->g_ll.ll_label; 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register int i; 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = n; i > 0; i--, l++) { 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (l->lb_type == type && l->lb_str == NULL) { 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf("It's a token we know\n")); 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return n - i; 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf("Illegal token\n")); 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return -1; 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfuture_hack(parser_state *ps) 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm node *n = ps->p_stack.s_top->s_parent; 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm node *ch, *cch; 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* from __future__ import ..., must have at least 4 children */ 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = CHILD(n, 0); 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (NCH(n) < 4) 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return; 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ch = CHILD(n, 0); 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0) 1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return; 1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ch = CHILD(n, 1); 1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (NCH(ch) == 1 && STR(CHILD(ch, 0)) && 1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strcmp(STR(CHILD(ch, 0)), "__future__") != 0) 1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return; 1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ch = CHILD(n, 3); 1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* ch can be a star, a parenthesis or import_as_names */ 1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (TYPE(ch) == STAR) 1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return; 2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (TYPE(ch) == LPAR) 2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ch = CHILD(n, 4); 2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < NCH(ch); i += 2) { 2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm cch = CHILD(ch, i); 2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { 2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm char *str_ch = STR(CHILD(cch, 0)); 2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { 2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ps->p_flags |= CO_FUTURE_WITH_STATEMENT; 2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { 2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; 2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { 2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; 2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif /* future keyword */ 2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmint 2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPyParser_AddToken(register parser_state *ps, register int type, char *str, 2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int lineno, int col_offset, int *expected_ret) 2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register int ilabel; 2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int err; 2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str)); 2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Find out which label this token is */ 2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ilabel = classify(ps, type, str); 2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (ilabel < 0) 2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return E_SYNTAX; 2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Loop until the token is shifted or an error occurred */ 2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (;;) { 2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Fetch the current dfa and state */ 2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register dfa *d = ps->p_stack.s_top->s_dfa; 2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register state *s = &d->d_state[ps->p_stack.s_top->s_state]; 2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf(" DFA '%s', state %d:", 2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm d->d_name, ps->p_stack.s_top->s_state)); 2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Check accelerator */ 2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s->s_lower <= ilabel && ilabel < s->s_upper) { 2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm register int x = s->s_accel[ilabel - s->s_lower]; 2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (x != -1) { 2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (x & (1<<7)) { 2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Push non-terminal */ 2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int nt = (x >> 8) + NT_OFFSET; 2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int arrow = x & ((1<<7)-1); 2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dfa *d1 = PyGrammar_FindDFA( 2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ps->p_grammar, nt); 2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ((err = push(&ps->p_stack, nt, d1, 2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm arrow, lineno, col_offset)) > 0) { 2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf(" MemError: push\n")); 2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return err; 2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf(" Push ...\n")); 2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Shift the token */ 2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if ((err = shift(&ps->p_stack, type, str, 2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm x, lineno, col_offset)) > 0) { 2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf(" MemError: shift.\n")); 2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return err; 2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf(" Shift.\n")); 2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Pop while we are in an accept-only state */ 2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while (s = &d->d_state 2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm [ps->p_stack.s_top->s_state], 2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_accept && s->s_narcs == 1) { 2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf(" DFA '%s', state %d: " 2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "Direct pop.\n", 2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm d->d_name, 2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ps->p_stack.s_top->s_state)); 2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (d->d_name[0] == 'i' && 2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strcmp(d->d_name, 2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "import_stmt") == 0) 2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm future_hack(ps); 2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif 2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s_pop(&ps->p_stack); 2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s_empty(&ps->p_stack)) { 2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf(" ACCEPT.\n")); 2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return E_DONE; 2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm d = ps->p_stack.s_top->s_dfa; 2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return E_OK; 2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s->s_accept) { 2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (d->d_name[0] == 'i' && 2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm strcmp(d->d_name, "import_stmt") == 0) 2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm future_hack(ps); 2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif 2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Pop this dfa and try again */ 3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s_pop(&ps->p_stack); 3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf(" Pop ...\n")); 3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s_empty(&ps->p_stack)) { 3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf(" Error: bottom of stack.\n")); 3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return E_SYNTAX; 3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Stuck, report syntax error */ 3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm D(printf(" Error.\n")); 3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (expected_ret) { 3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s->s_lower == s->s_upper - 1) { 3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Only one possible expected token */ 3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm *expected_ret = ps->p_grammar-> 3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g_ll.ll_label[s->s_lower].lb_type; 3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else 3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm *expected_ret = -1; 3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return E_SYNTAX; 3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef Py_DEBUG 3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* DEBUG OUTPUT */ 3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid 3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdumptree(grammar *g, node *n) 3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (n == NULL) 3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("NIL"); 3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else { 3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm label l; 3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm l.lb_type = TYPE(n); 3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm l.lb_str = STR(n); 3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("%s", PyGrammar_LabelRepr(&l)); 3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (ISNONTERMINAL(TYPE(n))) { 3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("("); 3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < NCH(n); i++) { 3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (i > 0) 3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf(","); 3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dumptree(g, CHILD(n, i)); 3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf(")"); 3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid 3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmshowtree(grammar *g, node *n) 3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (n == NULL) 3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return; 3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (ISNONTERMINAL(TYPE(n))) { 3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < NCH(n); i++) 3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm showtree(g, CHILD(n, i)); 3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else if (ISTERMINAL(TYPE(n))) { 3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("%s", _PyParser_TokenNames[TYPE(n)]); 3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (TYPE(n) == NUMBER || TYPE(n) == NAME) 3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("(%s)", STR(n)); 3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf(" "); 3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else 3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("? "); 3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid 3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmprinttree(parser_state *ps) 3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (Py_DebugFlag) { 3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("Parse tree:\n"); 3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dumptree(ps->p_grammar, ps->p_tree); 3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("\n"); 3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("Tokens:\n"); 3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm showtree(ps->p_grammar, ps->p_tree); 3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("\n"); 3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("Listing:\n"); 3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyNode_ListTree(ps->p_tree); 3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("\n"); 3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif /* Py_DEBUG */ 3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* 3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDescription 3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm----------- 3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe parser's interface is different than usual: the function addtoken() 3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmmust be called for each token in the input. This makes it possible to 3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmturn it into an incremental parsing system later. The parsing system 4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmconstructs a parse tree as it goes. 4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmA parsing rule is represented as a Deterministic Finite-state Automaton 4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm(DFA). A node in a DFA represents a state of the parser; an arc represents 4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylma transition. Transitions are either labeled with terminal symbols or 4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmwith non-terminals. When the parser decides to follow an arc labeled 4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmwith a non-terminal, it is invoked recursively with the DFA representing 4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthe parsing rule for that as its initial state; when that DFA accepts, 4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthe parser that invoked it continues. The parse tree constructed by the 4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmrecursively called parser is inserted as a child in the current parse tree. 4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe DFA's can be constructed automatically from a more conventional 4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmlanguage description. An extended LL(1) grammar (ELL(1)) is suitable. 4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmCertain restrictions make the parser's life easier: rules that can produce 4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthe empty string should be outlawed (there are other ways to put loops 4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmor optional parts in the language). To avoid the need to construct 4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmFIRST sets, we can require that all but the last alternative of a rule 4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm(really: arc going out of a DFA's state) must begin with a terminal 4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmsymbol. 4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmAs an example, consider this grammar: 4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmexpr: term (OP term)* 4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmterm: CONSTANT | '(' expr ')' 4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe DFA corresponding to the rule for expr is: 4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm------->.---term-->.-------> 4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ^ | 4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm | | 4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm \----OP----/ 4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe parse tree generated for the input a+b is: 4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b))) 4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm*/ 437