1c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 2c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* Parser implementation */ 3c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 4c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* For a description, see the comments at end of this file */ 5c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 6c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* XXX To do: error recovery */ 7c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 8c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "Python.h" 9c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "pgenheaders.h" 10c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "token.h" 11c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "grammar.h" 12c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "node.h" 13c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "parser.h" 14c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#include "errcode.h" 15c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 16c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 17c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_DEBUG 18c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielextern int Py_DebugFlag; 19c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#define D(x) if (!Py_DebugFlag); else x 20c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else 21c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#define D(x) 22c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 23c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 24c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 25c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* STACK DATA TYPE */ 26c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 27c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void s_reset(stack *); 28c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 29c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void 30c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniels_reset(stack *s) 31c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 32c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel s->s_top = &s->s_base[MAXSTACK]; 33c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 34c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 35c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK]) 36c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 37c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 38c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniels_push(register stack *s, dfa *d, node *parent) 39c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 40c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register stackentry *top; 41c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (s->s_top == s->s_base) { 42c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel fprintf(stderr, "s_push: parser stack overflow\n"); 43c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return E_NOMEM; 44c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 45c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel top = --s->s_top; 46c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel top->s_dfa = d; 47c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel top->s_parent = parent; 48c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel top->s_state = 0; 49c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 50c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 51c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 52c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_DEBUG 53c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 54c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void 55c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniels_pop(register stack *s) 56c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 57c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (s_empty(s)) 58c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel Py_FatalError("s_pop: parser stack underflow -- FATAL"); 59c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel s->s_top++; 60c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 61c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 62c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#else /* !Py_DEBUG */ 63c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 64c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#define s_pop(s) (s)->s_top++ 65c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 66c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 67c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 68c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 69c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* PARSER CREATION */ 70c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 71c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielparser_state * 72c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyParser_New(grammar *g, int start) 73c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 74c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel parser_state *ps; 75c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 76c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (!g->g_accel) 77c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyGrammar_AddAccelerators(g); 78c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state)); 79c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ps == NULL) 80c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 81c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ps->p_grammar = g; 82c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 83c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ps->p_flags = 0; 84c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 85c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ps->p_tree = PyNode_New(start); 86c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ps->p_tree == NULL) { 87c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(ps); 88c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return NULL; 89c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 90c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel s_reset(&ps->p_stack); 91c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree); 92c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return ps; 93c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 94c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 95c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid 96c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyParser_Delete(parser_state *ps) 97c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 98c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* NB If you want to save the parse tree, 99c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel you must set p_tree to NULL before calling delparser! */ 100c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyNode_Free(ps->p_tree); 101c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyMem_FREE(ps); 102c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 103c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 104c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 105c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* PARSER STACK OPERATIONS */ 106c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 107c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 108c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielshift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset) 109c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 110c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int err; 111c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(!s_empty(s)); 112c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset); 113c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (err) 114c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return err; 115c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel s->s_top->s_state = newstate; 116c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return 0; 117c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 118c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 119c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 120c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielpush(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset) 121c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 122c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int err; 123c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register node *n; 124c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel n = s->s_top->s_parent; 125c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel assert(!s_empty(s)); 126c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset); 127c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (err) 128c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return err; 129c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel s->s_top->s_state = newstate; 130c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return s_push(s, d, CHILD(n, NCH(n)-1)); 131c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 132c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 133c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 134c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* PARSER PROPER */ 135c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 136c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic int 137c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielclassify(parser_state *ps, int type, char *str) 138c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 139c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel grammar *g = ps->p_grammar; 140c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register int n = g->g_ll.ll_nlabels; 141c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 142c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (type == NAME) { 143c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register char *s = str; 144c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register label *l = g->g_ll.ll_label; 145c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register int i; 146c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (i = n; i > 0; i--, l++) { 147c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (l->lb_type != NAME || l->lb_str == NULL || 148c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel l->lb_str[0] != s[0] || 149c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strcmp(l->lb_str, s) != 0) 150c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel continue; 151c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 152c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION && 153c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel s[0] == 'p' && strcmp(s, "print") == 0) { 154c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel break; /* no longer a keyword */ 155c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 156c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 157c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf("It's a keyword\n")); 158c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return n - i; 159c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 160c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 161c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 162c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel { 163c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register label *l = g->g_ll.ll_label; 164c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register int i; 165c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (i = n; i > 0; i--, l++) { 166c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (l->lb_type == type && l->lb_str == NULL) { 167c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf("It's a token we know\n")); 168c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return n - i; 169c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 170c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 171c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 172c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 173c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf("Illegal token\n")); 174c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return -1; 175c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 176c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 177c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 178c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielstatic void 179c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielfuture_hack(parser_state *ps) 180c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 181c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel node *n = ps->p_stack.s_top->s_parent; 182c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel node *ch, *cch; 183c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int i; 184c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 185c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* from __future__ import ..., must have at least 4 children */ 186c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel n = CHILD(n, 0); 187c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (NCH(n) < 4) 188c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return; 189c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ch = CHILD(n, 0); 190c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0) 191c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return; 192c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ch = CHILD(n, 1); 193c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (NCH(ch) == 1 && STR(CHILD(ch, 0)) && 194c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strcmp(STR(CHILD(ch, 0)), "__future__") != 0) 195c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return; 196c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ch = CHILD(n, 3); 197c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* ch can be a star, a parenthesis or import_as_names */ 198c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (TYPE(ch) == STAR) 199c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return; 200c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (TYPE(ch) == LPAR) 201c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ch = CHILD(n, 4); 202c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 203c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (i = 0; i < NCH(ch); i += 2) { 204c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel cch = CHILD(ch, i); 205c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { 206c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel char *str_ch = STR(CHILD(cch, 0)); 207c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { 208c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ps->p_flags |= CO_FUTURE_WITH_STATEMENT; 209c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { 210c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; 211c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { 212c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; 213c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 214c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 215c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 216c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 217c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif /* future keyword */ 218c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 219c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielint 220c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielPyParser_AddToken(register parser_state *ps, register int type, char *str, 221c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int lineno, int col_offset, int *expected_ret) 222c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 223c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register int ilabel; 224c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int err; 225c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 226c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str)); 227c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 228c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Find out which label this token is */ 229c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ilabel = classify(ps, type, str); 230c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ilabel < 0) 231c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return E_SYNTAX; 232c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 233c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Loop until the token is shifted or an error occurred */ 234c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (;;) { 235c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Fetch the current dfa and state */ 236c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register dfa *d = ps->p_stack.s_top->s_dfa; 237c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register state *s = &d->d_state[ps->p_stack.s_top->s_state]; 238c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 239c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf(" DFA '%s', state %d:", 240c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel d->d_name, ps->p_stack.s_top->s_state)); 241c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 242c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Check accelerator */ 243c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (s->s_lower <= ilabel && ilabel < s->s_upper) { 244c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel register int x = s->s_accel[ilabel - s->s_lower]; 245c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (x != -1) { 246c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (x & (1<<7)) { 247c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Push non-terminal */ 248c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int nt = (x >> 8) + NT_OFFSET; 249c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int arrow = x & ((1<<7)-1); 250c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel dfa *d1 = PyGrammar_FindDFA( 251c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ps->p_grammar, nt); 252c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if ((err = push(&ps->p_stack, nt, d1, 253c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel arrow, lineno, col_offset)) > 0) { 254c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf(" MemError: push\n")); 255c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return err; 256c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 257c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf(" Push ...\n")); 258c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel continue; 259c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 260c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 261c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Shift the token */ 262c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if ((err = shift(&ps->p_stack, type, str, 263c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel x, lineno, col_offset)) > 0) { 264c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf(" MemError: shift.\n")); 265c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return err; 266c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 267c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf(" Shift.\n")); 268c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Pop while we are in an accept-only state */ 269c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel while (s = &d->d_state 270c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel [ps->p_stack.s_top->s_state], 271c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel s->s_accept && s->s_narcs == 1) { 272c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf(" DFA '%s', state %d: " 273c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "Direct pop.\n", 274c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel d->d_name, 275c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ps->p_stack.s_top->s_state)); 276c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 277c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (d->d_name[0] == 'i' && 278c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strcmp(d->d_name, 279c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel "import_stmt") == 0) 280c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel future_hack(ps); 281c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 282c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel s_pop(&ps->p_stack); 283c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (s_empty(&ps->p_stack)) { 284c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf(" ACCEPT.\n")); 285c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return E_DONE; 286c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 287c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel d = ps->p_stack.s_top->s_dfa; 288c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 289c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return E_OK; 290c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 291c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 292c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 293c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (s->s_accept) { 294c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 295c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (d->d_name[0] == 'i' && 296c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel strcmp(d->d_name, "import_stmt") == 0) 297c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel future_hack(ps); 298c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif 299c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Pop this dfa and try again */ 300c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel s_pop(&ps->p_stack); 301c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf(" Pop ...\n")); 302c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (s_empty(&ps->p_stack)) { 303c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf(" Error: bottom of stack.\n")); 304c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return E_SYNTAX; 305c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 306c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel continue; 307c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 308c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 309c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Stuck, report syntax error */ 310c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel D(printf(" Error.\n")); 311c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (expected_ret) { 312c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (s->s_lower == s->s_upper - 1) { 313c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel /* Only one possible expected token */ 314c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *expected_ret = ps->p_grammar-> 315c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel g_ll.ll_label[s->s_lower].lb_type; 316c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 317c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else 318c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel *expected_ret = -1; 319c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 320c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return E_SYNTAX; 321c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 322c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 323c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 324c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 325c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#ifdef Py_DEBUG 326c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 327c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* DEBUG OUTPUT */ 328c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 329c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid 330c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanieldumptree(grammar *g, node *n) 331c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 332c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int i; 333c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 334c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (n == NULL) 335c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("NIL"); 336c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else { 337c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel label l; 338c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel l.lb_type = TYPE(n); 339c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel l.lb_str = STR(n); 340c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("%s", PyGrammar_LabelRepr(&l)); 341c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ISNONTERMINAL(TYPE(n))) { 342c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("("); 343c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (i = 0; i < NCH(n); i++) { 344c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (i > 0) 345c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf(","); 346c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel dumptree(g, CHILD(n, i)); 347c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 348c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf(")"); 349c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 350c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 351c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 352c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 353c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid 354c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielshowtree(grammar *g, node *n) 355c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 356c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel int i; 357c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 358c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (n == NULL) 359c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel return; 360c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (ISNONTERMINAL(TYPE(n))) { 361c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel for (i = 0; i < NCH(n); i++) 362c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel showtree(g, CHILD(n, i)); 363c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 364c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else if (ISTERMINAL(TYPE(n))) { 365c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("%s", _PyParser_TokenNames[TYPE(n)]); 366c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (TYPE(n) == NUMBER || TYPE(n) == NAME) 367c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("(%s)", STR(n)); 368c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf(" "); 369c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 370c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel else 371c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("? "); 372c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 373c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 374c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielvoid 375c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielprinttree(parser_state *ps) 376c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel{ 377c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel if (Py_DebugFlag) { 378c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("Parse tree:\n"); 379c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel dumptree(ps->p_grammar, ps->p_tree); 380c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("\n"); 381c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("Tokens:\n"); 382c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel showtree(ps->p_grammar, ps->p_tree); 383c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("\n"); 384c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel } 385c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("Listing:\n"); 386c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel PyNode_ListTree(ps->p_tree); 387c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel printf("\n"); 388c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel} 389c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 390c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel#endif /* Py_DEBUG */ 391c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 392c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel/* 393c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 394c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielDescription 395c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel----------- 396c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 397c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielThe parser's interface is different than usual: the function addtoken() 398c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielmust be called for each token in the input. This makes it possible to 399c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielturn it into an incremental parsing system later. The parsing system 400c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielconstructs a parse tree as it goes. 401c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 402c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielA parsing rule is represented as a Deterministic Finite-state Automaton 403c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel(DFA). A node in a DFA represents a state of the parser; an arc represents 404c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniela transition. Transitions are either labeled with terminal symbols or 405c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielwith non-terminals. When the parser decides to follow an arc labeled 406c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielwith a non-terminal, it is invoked recursively with the DFA representing 407c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielthe parsing rule for that as its initial state; when that DFA accepts, 408c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielthe parser that invoked it continues. The parse tree constructed by the 409c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielrecursively called parser is inserted as a child in the current parse tree. 410c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 411c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielThe DFA's can be constructed automatically from a more conventional 412c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniellanguage description. An extended LL(1) grammar (ELL(1)) is suitable. 413c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielCertain restrictions make the parser's life easier: rules that can produce 414c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielthe empty string should be outlawed (there are other ways to put loops 415c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielor optional parts in the language). To avoid the need to construct 416c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielFIRST sets, we can require that all but the last alternative of a rule 417c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel(really: arc going out of a DFA's state) must begin with a terminal 418c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielsymbol. 419c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 420c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielAs an example, consider this grammar: 421c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 422c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielexpr: term (OP term)* 423c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielterm: CONSTANT | '(' expr ')' 424c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 425c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielThe DFA corresponding to the rule for expr is: 426c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 427c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel------->.---term-->.-------> 428c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel ^ | 429c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel | | 430c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel \----OP----/ 431c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 432c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDanielThe parse tree generated for the input a+b is: 433c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 434c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b))) 435c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel 436c8042e10763bca064df257547d04ae3dfcdfaf91Daryl McDaniel*/ 437