14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Parser generator */ 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* For a description, see the comments at end of this file */ 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "Python.h" 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "pgenheaders.h" 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "token.h" 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "node.h" 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "grammar.h" 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "metagrammar.h" 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "pgen.h" 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmextern int Py_DebugFlag; 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmextern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */ 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */ 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _nfaarc { 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int ar_label; 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int ar_arrow; 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} nfaarc; 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _nfastate { 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int st_narcs; 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfaarc *st_arc; 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} nfastate; 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _nfa { 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int nf_type; 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm char *nf_name; 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int nf_nstates; 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfastate *nf_state; 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int nf_start, nf_finish; 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} nfa; 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Forward */ 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void compile_rhs(labellist *ll, 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfa *nf, node *n, int *pa, int *pb); 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void compile_alt(labellist *ll, 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfa *nf, node *n, int *pa, int *pb); 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void compile_item(labellist *ll, 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfa *nf, node *n, int *pa, int *pb); 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void compile_atom(labellist *ll, 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfa *nf, node *n, int *pa, int *pb); 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmaddnfastate(nfa *nf) 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfastate *st; 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sizeof(nfastate) * (nf->nf_nstates + 1)); 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (nf->nf_state == NULL) 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Py_FatalError("out of mem"); 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm st = &nf->nf_state[nf->nf_nstates++]; 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm st->st_narcs = 0; 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm st->st_arc = NULL; 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return st - nf->nf_state; 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmaddnfaarc(nfa *nf, int from, int to, int lbl) 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfastate *st; 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfaarc *ar; 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm st = &nf->nf_state[from]; 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc, 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sizeof(nfaarc) * (st->st_narcs + 1)); 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (st->st_arc == NULL) 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Py_FatalError("out of mem"); 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ar = &st->st_arc[st->st_narcs++]; 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ar->ar_label = lbl; 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ar->ar_arrow = to; 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic nfa * 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmnewnfa(char *name) 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfa *nf; 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm static int type = NT_OFFSET; /* All types will be disjunct */ 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (nf == NULL) 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Py_FatalError("no mem for new nfa"); 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf->nf_type = type++; 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf->nf_name = name; /* XXX strdup(name) ??? */ 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf->nf_nstates = 0; 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf->nf_state = NULL; 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf->nf_start = nf->nf_finish = -1; 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return nf; 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _nfagrammar { 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int gr_nnfas; 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfa **gr_nfa; 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm labellist gr_ll; 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} nfagrammar; 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Forward */ 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void compile_rule(nfagrammar *gr, node *n); 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic nfagrammar * 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmnewnfagrammar(void) 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfagrammar *gr; 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar)); 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (gr == NULL) 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Py_FatalError("no mem for new nfa grammar"); 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm gr->gr_nnfas = 0; 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm gr->gr_nfa = NULL; 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm gr->gr_ll.ll_nlabels = 0; 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm gr->gr_ll.ll_label = NULL; 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return gr; 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic nfa * 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmaddnfa(nfagrammar *gr, char *name) 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfa *nf; 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf = newnfa(name); 1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa, 1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm sizeof(nfa*) * (gr->gr_nnfas + 1)); 1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (gr->gr_nfa == NULL) 1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Py_FatalError("out of mem"); 1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm gr->gr_nfa[gr->gr_nnfas++] = nf; 1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addlabel(&gr->gr_ll, NAME, nf->nf_name); 1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return nf; 1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef Py_DEBUG 1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic char REQNFMT[] = "metacompile: less than %d children\n"; 1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define REQN(i, count) \ 1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (i < count) { \ 1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fprintf(stderr, REQNFMT, count); \ 1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Py_FatalError("REQN"); \ 1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } else 1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#else 1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define REQN(i, count) /* empty */ 1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif 1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic nfagrammar * 1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmmetacompile(node *n) 1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfagrammar *gr; 1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (Py_DebugFlag) 1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("Compiling (meta-) parse tree into NFA grammar\n"); 1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm gr = newnfagrammar(); 1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, MSTART); 1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = n->n_nchildren - 1; /* Last child is ENDMARKER */ 1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = n->n_child; 1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (; --i >= 0; n++) { 1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (n->n_type != NEWLINE) 1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm compile_rule(gr, n); 1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return gr; 1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcompile_rule(nfagrammar *gr, node *n) 1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfa *nf; 1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, RULE); 1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQN(n->n_nchildren, 4); 1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = n->n_child; 1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, NAME); 1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf = addnfa(gr, n->n_str); 1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, COLON); 1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, RHS); 1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); 1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, NEWLINE); 1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcompile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int a, b; 1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, RHS); 1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = n->n_nchildren; 1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQN(i, 1); 1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = n->n_child; 1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, ALT); 1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm compile_alt(ll, nf, n, pa, pb); 1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (--i <= 0) 2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return; 2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm a = *pa; 2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm b = *pb; 2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm *pa = addnfastate(nf); 2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm *pb = addnfastate(nf); 2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addnfaarc(nf, *pa, a, EMPTY); 2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addnfaarc(nf, b, *pb, EMPTY); 2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (; --i >= 0; n++) { 2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, VBAR); 2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQN(i, 1); 2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm --i; 2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, ALT); 2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm compile_alt(ll, nf, n, &a, &b); 2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addnfaarc(nf, *pa, a, EMPTY); 2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addnfaarc(nf, b, *pb, EMPTY); 2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcompile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int a, b; 2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, ALT); 2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = n->n_nchildren; 2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQN(i, 1); 2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = n->n_child; 2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, ITEM); 2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm compile_item(ll, nf, n, pa, pb); 2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm --i; 2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (; --i >= 0; n++) { 2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, ITEM); 2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm compile_item(ll, nf, n, &a, &b); 2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addnfaarc(nf, *pb, a, EMPTY); 2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm *pb = b; 2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcompile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int a, b; 2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, ITEM); 2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = n->n_nchildren; 2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQN(i, 1); 2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = n->n_child; 2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (n->n_type == LSQB) { 2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQN(i, 3); 2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, RHS); 2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm *pa = addnfastate(nf); 2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm *pb = addnfastate(nf); 2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addnfaarc(nf, *pa, *pb, EMPTY); 2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm compile_rhs(ll, nf, n, &a, &b); 2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addnfaarc(nf, *pa, a, EMPTY); 2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addnfaarc(nf, b, *pb, EMPTY); 2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQN(i, 1); 2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, RSQB); 2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else { 2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm compile_atom(ll, nf, n, pa, pb); 2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (--i <= 0) 2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return; 2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addnfaarc(nf, *pb, *pa, EMPTY); 2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (n->n_type == STAR) 2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm *pb = *pa; 2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else 2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, PLUS); 2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcompile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, ATOM); 2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i = n->n_nchildren; 2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQN(i, 1); 2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n = n->n_child; 2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (n->n_type == LPAR) { 2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQN(i, 3); 2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, RHS); 2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm compile_rhs(ll, nf, n, pa, pb); 2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm n++; 2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, RPAR); 2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else if (n->n_type == NAME || n->n_type == STRING) { 2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm *pa = addnfastate(nf); 2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm *pb = addnfastate(nf); 2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); 3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else 3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm REQ(n, NAME); 3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdumpstate(labellist *ll, nfa *nf, int istate) 3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfastate *st; 3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfaarc *ar; 3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("%c%2d%c", 3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm istate == nf->nf_start ? '*' : ' ', 3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm istate, 3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm istate == nf->nf_finish ? '.' : ' '); 3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm st = &nf->nf_state[istate]; 3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ar = st->st_arc; 3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < st->st_narcs; i++) { 3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (i > 0) 3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("\n "); 3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("-> %2d %s", ar->ar_arrow, 3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label])); 3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ar++; 3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("\n"); 3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdumpnfa(labellist *ll, nfa *nf) 3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("NFA '%s' has %d states; start %d, finish %d\n", 3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish); 3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < nf->nf_nstates; i++) 3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dumpstate(ll, nf, i); 3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */ 3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmaddclosure(bitset ss, nfa *nf, int istate) 3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (addbit(ss, istate)) { 3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfastate *st = &nf->nf_state[istate]; 3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfaarc *ar = st->st_arc; 3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = st->st_narcs; --i >= 0; ) { 3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (ar->ar_label == EMPTY) 3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addclosure(ss, nf, ar->ar_arrow); 3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ar++; 3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _ss_arc { 3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm bitset sa_bitset; 3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int sa_arrow; 3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int sa_label; 3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} ss_arc; 3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _ss_state { 3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm bitset ss_ss; 3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int ss_narcs; 3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm struct _ss_arc *ss_arc; 3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int ss_deleted; 3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int ss_finish; 3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int ss_rename; 3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} ss_state; 3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _ss_dfa { 3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int sd_nstates; 3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ss_state *sd_state; 3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} ss_dfa; 3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Forward */ 3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, 3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm labellist *ll, char *msg); 3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void simplify(int xx_nstates, ss_state *xx_state); 3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void convert(dfa *d, int xx_nstates, ss_state *xx_state); 3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmmakedfa(nfagrammar *gr, nfa *nf, dfa *d) 3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int nbits = nf->nf_nstates; 3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm bitset ss; 3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int xx_nstates; 3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ss_state *xx_state, *yy; 3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ss_arc *zz; 3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int istate, jstate, iarc, jarc, ibit; 3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfastate *st; 3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfaarc *ar; 3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ss = newbitset(nbits); 3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addclosure(ss, nf, nf->nf_start); 3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state)); 3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (xx_state == NULL) 4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Py_FatalError("no mem for xx_state in makedfa"); 4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm xx_nstates = 1; 4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy = &xx_state[0]; 4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_ss = ss; 4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_narcs = 0; 4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_arc = NULL; 4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_deleted = 0; 4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_finish = testbit(ss, nf->nf_finish); 4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (yy->ss_finish) 4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("Error: nonterminal '%s' may produce empty.\n", 4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf->nf_name); 4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* This algorithm is from a book written before 4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm the invention of structured programming... */ 4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* For each unmarked state... */ 4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (istate = 0; istate < xx_nstates; ++istate) { 4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm size_t size; 4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy = &xx_state[istate]; 4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ss = yy->ss_ss; 4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* For all its states... */ 4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (ibit = 0; ibit < nf->nf_nstates; ++ibit) { 4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (!testbit(ss, ibit)) 4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm st = &nf->nf_state[ibit]; 4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* For all non-empty arcs from this state... */ 4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (iarc = 0; iarc < st->st_narcs; iarc++) { 4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ar = &st->st_arc[iarc]; 4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (ar->ar_label == EMPTY) 4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Look up in list of arcs from this state */ 4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (jarc = 0; jarc < yy->ss_narcs; ++jarc) { 4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz = &yy->ss_arc[jarc]; 4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (ar->ar_label == zz->sa_label) 4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm goto found; 4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Add new arc for this state */ 4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm size = sizeof(ss_arc) * (yy->ss_narcs + 1); 4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_arc = (ss_arc *)PyObject_REALLOC( 4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_arc, size); 4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (yy->ss_arc == NULL) 4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Py_FatalError("out of mem"); 4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz = &yy->ss_arc[yy->ss_narcs++]; 4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz->sa_label = ar->ar_label; 4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz->sa_bitset = newbitset(nbits); 4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz->sa_arrow = -1; 4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm found: ; 4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Add destination */ 4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addclosure(zz->sa_bitset, nf, ar->ar_arrow); 4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* Now look up all the arrow states */ 4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) { 4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz = &xx_state[istate].ss_arc[jarc]; 4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (jstate = 0; jstate < xx_nstates; jstate++) { 4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (samebitset(zz->sa_bitset, 4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm xx_state[jstate].ss_ss, nbits)) { 4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz->sa_arrow = jstate; 4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm goto done; 4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm size = sizeof(ss_state) * (xx_nstates + 1); 4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm xx_state = (ss_state *)PyObject_REALLOC(xx_state, 4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm size); 4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (xx_state == NULL) 4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Py_FatalError("out of mem"); 4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz->sa_arrow = xx_nstates; 4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy = &xx_state[xx_nstates++]; 4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_ss = zz->sa_bitset; 4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_narcs = 0; 4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_arc = NULL; 4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_deleted = 0; 4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish); 4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm done: ; 4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (Py_DebugFlag) 4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, 4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "before minimizing"); 4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm simplify(xx_nstates, xx_state); 4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (Py_DebugFlag) 4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, 4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm "after minimizing"); 4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm convert(d, xx_nstates, xx_state); 4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* XXX cleanup */ 4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyObject_FREE(xx_state); 4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmprintssdfa(int xx_nstates, ss_state *xx_state, int nbits, 4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm labellist *ll, char *msg) 4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i, ibit, iarc; 4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ss_state *yy; 4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ss_arc *zz; 5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("Subset DFA %s\n", msg); 5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < xx_nstates; i++) { 5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy = &xx_state[i]; 5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (yy->ss_deleted) 5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf(" Subset %d", i); 5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (yy->ss_finish) 5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf(" (finish)"); 5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf(" { "); 5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (ibit = 0; ibit < nbits; ibit++) { 5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (testbit(yy->ss_ss, ibit)) 5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("%d ", ibit); 5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("}\n"); 5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (iarc = 0; iarc < yy->ss_narcs; iarc++) { 5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz = &yy->ss_arc[iarc]; 5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf(" Arc to state %d, label %s\n", 5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz->sa_arrow, 5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyGrammar_LabelRepr( 5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm &ll->ll_label[zz->sa_label])); 5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PART THREE -- SIMPLIFY DFA */ 5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Simplify the DFA by repeatedly eliminating states that are 5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm equivalent to another oner. This is NOT Algorithm 3.3 from 5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm [Aho&Ullman 77]. It does not always finds the minimal DFA, 5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm but it does usually make a much smaller one... (For an example 5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm of sub-optimal behavior, try S: x a b+ | y a b+.) 5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm*/ 5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int 5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmsamestate(ss_state *s1, ss_state *s2) 5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish) 5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 0; 5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < s1->ss_narcs; i++) { 5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow || 5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label) 5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 0; 5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return 1; 5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmrenamestates(int xx_nstates, ss_state *xx_state, int from, int to) 5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i, j; 5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (Py_DebugFlag) 5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("Rename state %d to %d.\n", from, to); 5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < xx_nstates; i++) { 5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (xx_state[i].ss_deleted) 5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (j = 0; j < xx_state[i].ss_narcs; j++) { 5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (xx_state[i].ss_arc[j].sa_arrow == from) 5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm xx_state[i].ss_arc[j].sa_arrow = to; 5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmsimplify(int xx_nstates, ss_state *xx_state) 5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int changes; 5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i, j; 5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm do { 5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm changes = 0; 5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 1; i < xx_nstates; i++) { 5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (xx_state[i].ss_deleted) 5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (j = 0; j < i; j++) { 5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (xx_state[j].ss_deleted) 5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (samestate(&xx_state[i], &xx_state[j])) { 5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm xx_state[i].ss_deleted++; 5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm renamestates(xx_nstates, xx_state, 5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm i, j); 5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm changes++; 5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm break; 5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } while (changes); 5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PART FOUR -- GENERATE PARSING TABLES */ 5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Convert the DFA into a grammar that can be used by our parser */ 5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmconvert(dfa *d, int xx_nstates, ss_state *xx_state) 6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i, j; 6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ss_state *yy; 6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ss_arc *zz; 6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < xx_nstates; i++) { 6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy = &xx_state[i]; 6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (yy->ss_deleted) 6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy->ss_rename = addstate(d); 6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < xx_nstates; i++) { 6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm yy = &xx_state[i]; 6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (yy->ss_deleted) 6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (j = 0; j < yy->ss_narcs; j++) { 6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz = &yy->ss_arc[j]; 6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addarc(d, yy->ss_rename, 6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm xx_state[zz->sa_arrow].ss_rename, 6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm zz->sa_label); 6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (yy->ss_finish) 6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addarc(d, yy->ss_rename, yy->ss_rename, 0); 6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm d->d_initial = 0; 6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PART FIVE -- GLUE IT ALL TOGETHER */ 6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic grammar * 6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmmaketables(nfagrammar *gr) 6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfa *nf; 6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dfa *d; 6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm grammar *g; 6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (gr->gr_nnfas == 0) 6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return NULL; 6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g = newgrammar(gr->gr_nfa[0]->nf_type); 6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm /* XXX first rule must be start rule */ 6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g->g_ll = gr->gr_ll; 6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; i < gr->gr_nnfas; i++) { 6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nf = gr->gr_nfa[i]; 6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (Py_DebugFlag) { 6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("Dump of NFA for '%s' ...\n", nf->nf_name); 6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dumpnfa(&gr->gr_ll, nf); 6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("Making DFA for '%s' ...\n", nf->nf_name); 6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm d = adddfa(g, nf->nf_type, nf->nf_name); 6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm makedfa(gr, gr->gr_nfa[i], d); 6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return g; 6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgrammar * 6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmpgen(node *n) 6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nfagrammar *gr; 6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm grammar *g; 6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm gr = metacompile(n); 6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g = maketables(gr); 6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm translatelabels(g); 6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm addfirstsets(g); 6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyObject_FREE(gr); 6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return g; 6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgrammar * 6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPy_pgen(node *n) 6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm return pgen(n); 6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* 6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDescription 6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm----------- 6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmInput is a grammar in extended BNF (using * for repetition, + for 6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmat-least-once repetition, [] for optional parts, | for alternatives and 6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm() for grouping). This has already been parsed and turned into a parse 6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtree. 6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmEach rule is considered as a regular expression in its own right. 6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmIt is turned into a Non-deterministic Finite Automaton (NFA), which 6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmis then turned into a Deterministic Finite Automaton (DFA), which is then 6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmoptimized to reduce the number of states. See [Aho&Ullman 77] chapter 3, 6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmor similar compiler books (this technique is more often used for lexical 6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmanalyzers). 6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe DFA's are used by the parser as parsing tables in a special way 6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthat's probably unique. Before they are usable, the FIRST sets of all 6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmnon-terminals are computed. 7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmReference 7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm--------- 7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm[Aho&Ullman 77] 7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm (first edition) 7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm*/ 709