14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Parser generator */
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* For a description, see the comments at end of this file */
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "Python.h"
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "pgenheaders.h"
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "token.h"
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "node.h"
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "grammar.h"
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "metagrammar.h"
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "pgen.h"
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmextern int Py_DebugFlag;
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmextern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _nfaarc {
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int         ar_label;
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int         ar_arrow;
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} nfaarc;
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _nfastate {
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int         st_narcs;
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfaarc      *st_arc;
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} nfastate;
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _nfa {
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int                 nf_type;
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    char                *nf_name;
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int                 nf_nstates;
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfastate            *nf_state;
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int                 nf_start, nf_finish;
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} nfa;
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Forward */
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void compile_rhs(labellist *ll,
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        nfa *nf, node *n, int *pa, int *pb);
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void compile_alt(labellist *ll,
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        nfa *nf, node *n, int *pa, int *pb);
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void compile_item(labellist *ll,
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         nfa *nf, node *n, int *pa, int *pb);
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void compile_atom(labellist *ll,
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                         nfa *nf, node *n, int *pa, int *pb);
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmaddnfastate(nfa *nf)
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfastate *st;
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                sizeof(nfastate) * (nf->nf_nstates + 1));
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (nf->nf_state == NULL)
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Py_FatalError("out of mem");
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    st = &nf->nf_state[nf->nf_nstates++];
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    st->st_narcs = 0;
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    st->st_arc = NULL;
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return st - nf->nf_state;
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmaddnfaarc(nfa *nf, int from, int to, int lbl)
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfastate *st;
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfaarc *ar;
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    st = &nf->nf_state[from];
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                  sizeof(nfaarc) * (st->st_narcs + 1));
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (st->st_arc == NULL)
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Py_FatalError("out of mem");
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ar = &st->st_arc[st->st_narcs++];
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ar->ar_label = lbl;
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ar->ar_arrow = to;
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic nfa *
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmnewnfa(char *name)
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfa *nf;
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    static int type = NT_OFFSET; /* All types will be disjunct */
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (nf == NULL)
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Py_FatalError("no mem for new nfa");
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nf->nf_type = type++;
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nf->nf_name = name; /* XXX strdup(name) ??? */
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nf->nf_nstates = 0;
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nf->nf_state = NULL;
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nf->nf_start = nf->nf_finish = -1;
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return nf;
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _nfagrammar {
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int                 gr_nnfas;
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfa                 **gr_nfa;
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    labellist           gr_ll;
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} nfagrammar;
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Forward */
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void compile_rule(nfagrammar *gr, node *n);
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic nfagrammar *
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmnewnfagrammar(void)
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfagrammar *gr;
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (gr == NULL)
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Py_FatalError("no mem for new nfa grammar");
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    gr->gr_nnfas = 0;
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    gr->gr_nfa = NULL;
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    gr->gr_ll.ll_nlabels = 0;
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    gr->gr_ll.ll_label = NULL;
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return gr;
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic nfa *
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmaddnfa(nfagrammar *gr, char *name)
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfa *nf;
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nf = newnfa(name);
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                  sizeof(nfa*) * (gr->gr_nnfas + 1));
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (gr->gr_nfa == NULL)
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Py_FatalError("out of mem");
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    gr->gr_nfa[gr->gr_nnfas++] = nf;
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    addlabel(&gr->gr_ll, NAME, nf->nf_name);
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return nf;
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#ifdef Py_DEBUG
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic char REQNFMT[] = "metacompile: less than %d children\n";
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define REQN(i, count) \
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (i < count) { \
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fprintf(stderr, REQNFMT, count); \
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Py_FatalError("REQN"); \
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    } else
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#else
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#define REQN(i, count)  /* empty */
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#endif
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic nfagrammar *
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmmetacompile(node *n)
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfagrammar *gr;
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (Py_DebugFlag)
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("Compiling (meta-) parse tree into NFA grammar\n");
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    gr = newnfagrammar();
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, MSTART);
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    i = n->n_nchildren - 1; /* Last child is ENDMARKER */
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n = n->n_child;
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (; --i >= 0; n++) {
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (n->n_type != NEWLINE)
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            compile_rule(gr, n);
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return gr;
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcompile_rule(nfagrammar *gr, node *n)
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfa *nf;
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, RULE);
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQN(n->n_nchildren, 4);
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n = n->n_child;
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, NAME);
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nf = addnfa(gr, n->n_str);
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n++;
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, COLON);
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n++;
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, RHS);
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n++;
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, NEWLINE);
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcompile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int a, b;
1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, RHS);
1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    i = n->n_nchildren;
1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQN(i, 1);
1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n = n->n_child;
1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, ALT);
1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    compile_alt(ll, nf, n, pa, pb);
1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (--i <= 0)
2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return;
2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n++;
2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    a = *pa;
2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    b = *pb;
2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    *pa = addnfastate(nf);
2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    *pb = addnfastate(nf);
2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    addnfaarc(nf, *pa, a, EMPTY);
2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    addnfaarc(nf, b, *pb, EMPTY);
2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (; --i >= 0; n++) {
2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQ(n, VBAR);
2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQN(i, 1);
2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        --i;
2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        n++;
2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQ(n, ALT);
2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        compile_alt(ll, nf, n, &a, &b);
2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        addnfaarc(nf, *pa, a, EMPTY);
2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        addnfaarc(nf, b, *pb, EMPTY);
2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcompile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int a, b;
2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, ALT);
2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    i = n->n_nchildren;
2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQN(i, 1);
2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n = n->n_child;
2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, ITEM);
2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    compile_item(ll, nf, n, pa, pb);
2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    --i;
2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n++;
2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (; --i >= 0; n++) {
2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQ(n, ITEM);
2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        compile_item(ll, nf, n, &a, &b);
2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        addnfaarc(nf, *pb, a, EMPTY);
2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        *pb = b;
2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcompile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int a, b;
2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, ITEM);
2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    i = n->n_nchildren;
2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQN(i, 1);
2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n = n->n_child;
2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (n->n_type == LSQB) {
2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQN(i, 3);
2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        n++;
2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQ(n, RHS);
2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        *pa = addnfastate(nf);
2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        *pb = addnfastate(nf);
2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        addnfaarc(nf, *pa, *pb, EMPTY);
2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        compile_rhs(ll, nf, n, &a, &b);
2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        addnfaarc(nf, *pa, a, EMPTY);
2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        addnfaarc(nf, b, *pb, EMPTY);
2624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQN(i, 1);
2634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        n++;
2644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQ(n, RSQB);
2654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
2664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else {
2674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        compile_atom(ll, nf, n, pa, pb);
2684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (--i <= 0)
2694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return;
2704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        n++;
2714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        addnfaarc(nf, *pb, *pa, EMPTY);
2724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (n->n_type == STAR)
2734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            *pb = *pa;
2744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else
2754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            REQ(n, PLUS);
2764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
2774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
2784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
2804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmcompile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
2814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
2824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
2834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQ(n, ATOM);
2854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    i = n->n_nchildren;
2864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    REQN(i, 1);
2874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    n = n->n_child;
2884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (n->n_type == LPAR) {
2894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQN(i, 3);
2904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        n++;
2914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQ(n, RHS);
2924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        compile_rhs(ll, nf, n, pa, pb);
2934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        n++;
2944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQ(n, RPAR);
2954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
2964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else if (n->n_type == NAME || n->n_type == STRING) {
2974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        *pa = addnfastate(nf);
2984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        *pb = addnfastate(nf);
2994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
3004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
3014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    else
3024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        REQ(n, NAME);
3034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
3044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
3064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdumpstate(labellist *ll, nfa *nf, int istate)
3074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
3084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfastate *st;
3094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
3104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfaarc *ar;
3114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    printf("%c%2d%c",
3134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        istate == nf->nf_start ? '*' : ' ',
3144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        istate,
3154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        istate == nf->nf_finish ? '.' : ' ');
3164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    st = &nf->nf_state[istate];
3174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ar = st->st_arc;
3184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = 0; i < st->st_narcs; i++) {
3194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (i > 0)
3204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            printf("\n    ");
3214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("-> %2d  %s", ar->ar_arrow,
3224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
3234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        ar++;
3244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
3254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    printf("\n");
3264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
3274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
3294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdumpnfa(labellist *ll, nfa *nf)
3304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
3314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
3324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    printf("NFA '%s' has %d states; start %d, finish %d\n",
3344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
3354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = 0; i < nf->nf_nstates; i++)
3364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        dumpstate(ll, nf, i);
3374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
3384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */
3414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
3434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmaddclosure(bitset ss, nfa *nf, int istate)
3444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
3454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (addbit(ss, istate)) {
3464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        nfastate *st = &nf->nf_state[istate];
3474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        nfaarc *ar = st->st_arc;
3484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        int i;
3494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (i = st->st_narcs; --i >= 0; ) {
3514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (ar->ar_label == EMPTY)
3524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                addclosure(ss, nf, ar->ar_arrow);
3534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            ar++;
3544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
3554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
3564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
3574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _ss_arc {
3594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    bitset      sa_bitset;
3604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int         sa_arrow;
3614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int         sa_label;
3624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} ss_arc;
3634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _ss_state {
3654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    bitset      ss_ss;
3664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int         ss_narcs;
3674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    struct _ss_arc      *ss_arc;
3684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int         ss_deleted;
3694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int         ss_finish;
3704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int         ss_rename;
3714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} ss_state;
3724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtypedef struct _ss_dfa {
3744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int         sd_nstates;
3754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ss_state *sd_state;
3764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} ss_dfa;
3774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Forward */
3794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
3804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                       labellist *ll, char *msg);
3814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void simplify(int xx_nstates, ss_state *xx_state);
3824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void convert(dfa *d, int xx_nstates, ss_state *xx_state);
3834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
3854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmmakedfa(nfagrammar *gr, nfa *nf, dfa *d)
3864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
3874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int nbits = nf->nf_nstates;
3884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    bitset ss;
3894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int xx_nstates;
3904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ss_state *xx_state, *yy;
3914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ss_arc *zz;
3924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int istate, jstate, iarc, jarc, ibit;
3934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfastate *st;
3944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfaarc *ar;
3954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
3964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ss = newbitset(nbits);
3974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    addclosure(ss, nf, nf->nf_start);
3984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
3994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (xx_state == NULL)
4004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        Py_FatalError("no mem for xx_state in makedfa");
4014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    xx_nstates = 1;
4024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    yy = &xx_state[0];
4034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    yy->ss_ss = ss;
4044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    yy->ss_narcs = 0;
4054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    yy->ss_arc = NULL;
4064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    yy->ss_deleted = 0;
4074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    yy->ss_finish = testbit(ss, nf->nf_finish);
4084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (yy->ss_finish)
4094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("Error: nonterminal '%s' may produce empty.\n",
4104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            nf->nf_name);
4114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    /* This algorithm is from a book written before
4134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       the invention of structured programming... */
4144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    /* For each unmarked state... */
4164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (istate = 0; istate < xx_nstates; ++istate) {
4174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        size_t size;
4184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yy = &xx_state[istate];
4194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        ss = yy->ss_ss;
4204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        /* For all its states... */
4214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
4224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (!testbit(ss, ibit))
4234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                continue;
4244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            st = &nf->nf_state[ibit];
4254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            /* For all non-empty arcs from this state... */
4264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for (iarc = 0; iarc < st->st_narcs; iarc++) {
4274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                ar = &st->st_arc[iarc];
4284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (ar->ar_label == EMPTY)
4294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    continue;
4304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                /* Look up in list of arcs from this state */
4314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
4324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    zz = &yy->ss_arc[jarc];
4334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if (ar->ar_label == zz->sa_label)
4344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        goto found;
4354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                }
4364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                /* Add new arc for this state */
4374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                size = sizeof(ss_arc) * (yy->ss_narcs + 1);
4384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                yy->ss_arc = (ss_arc *)PyObject_REALLOC(
4394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                            yy->ss_arc, size);
4404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (yy->ss_arc == NULL)
4414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    Py_FatalError("out of mem");
4424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                zz = &yy->ss_arc[yy->ss_narcs++];
4434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                zz->sa_label = ar->ar_label;
4444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                zz->sa_bitset = newbitset(nbits);
4454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                zz->sa_arrow = -1;
4464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm             found:             ;
4474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                /* Add destination */
4484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                addclosure(zz->sa_bitset, nf, ar->ar_arrow);
4494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
4504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
4514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        /* Now look up all the arrow states */
4524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
4534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            zz = &xx_state[istate].ss_arc[jarc];
4544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for (jstate = 0; jstate < xx_nstates; jstate++) {
4554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (samebitset(zz->sa_bitset,
4564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    xx_state[jstate].ss_ss, nbits)) {
4574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    zz->sa_arrow = jstate;
4584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    goto done;
4594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                }
4604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
4614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            size = sizeof(ss_state) * (xx_nstates + 1);
4624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            xx_state = (ss_state *)PyObject_REALLOC(xx_state,
4634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                                        size);
4644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (xx_state == NULL)
4654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                Py_FatalError("out of mem");
4664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            zz->sa_arrow = xx_nstates;
4674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yy = &xx_state[xx_nstates++];
4684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yy->ss_ss = zz->sa_bitset;
4694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yy->ss_narcs = 0;
4704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yy->ss_arc = NULL;
4714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yy->ss_deleted = 0;
4724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
4734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm         done:          ;
4744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
4754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
4764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (Py_DebugFlag)
4784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
4794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                        "before minimizing");
4804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    simplify(xx_nstates, xx_state);
4824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (Py_DebugFlag)
4844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
4854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                        "after minimizing");
4864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    convert(d, xx_nstates, xx_state);
4884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    /* XXX cleanup */
4904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    PyObject_FREE(xx_state);
4914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
4924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
4934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
4944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmprintssdfa(int xx_nstates, ss_state *xx_state, int nbits,
4954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm           labellist *ll, char *msg)
4964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
4974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i, ibit, iarc;
4984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ss_state *yy;
4994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ss_arc *zz;
5004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    printf("Subset DFA %s\n", msg);
5024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = 0; i < xx_nstates; i++) {
5034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yy = &xx_state[i];
5044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (yy->ss_deleted)
5054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            continue;
5064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf(" Subset %d", i);
5074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (yy->ss_finish)
5084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            printf(" (finish)");
5094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf(" { ");
5104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (ibit = 0; ibit < nbits; ibit++) {
5114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (testbit(yy->ss_ss, ibit))
5124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                printf("%d ", ibit);
5134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
5144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("}\n");
5154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
5164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            zz = &yy->ss_arc[iarc];
5174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            printf("  Arc to state %d, label %s\n",
5184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                zz->sa_arrow,
5194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                PyGrammar_LabelRepr(
5204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    &ll->ll_label[zz->sa_label]));
5214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
5224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
5234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
5244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PART THREE -- SIMPLIFY DFA */
5274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Simplify the DFA by repeatedly eliminating states that are
5294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   equivalent to another oner.  This is NOT Algorithm 3.3 from
5304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   [Aho&Ullman 77].  It does not always finds the minimal DFA,
5314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   but it does usually make a much smaller one...  (For an example
5324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   of sub-optimal behavior, try S: x a b+ | y a b+.)
5334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm*/
5344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic int
5364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmsamestate(ss_state *s1, ss_state *s2)
5374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
5384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
5394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
5414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return 0;
5424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = 0; i < s1->ss_narcs; i++) {
5434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
5444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
5454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            return 0;
5464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
5474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return 1;
5484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
5494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
5514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmrenamestates(int xx_nstates, ss_state *xx_state, int from, int to)
5524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
5534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i, j;
5544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (Py_DebugFlag)
5564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        printf("Rename state %d to %d.\n", from, to);
5574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = 0; i < xx_nstates; i++) {
5584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (xx_state[i].ss_deleted)
5594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            continue;
5604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (j = 0; j < xx_state[i].ss_narcs; j++) {
5614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (xx_state[i].ss_arc[j].sa_arrow == from)
5624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                xx_state[i].ss_arc[j].sa_arrow = to;
5634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
5644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
5654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
5664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
5684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmsimplify(int xx_nstates, ss_state *xx_state)
5694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
5704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int changes;
5714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i, j;
5724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    do {
5744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        changes = 0;
5754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (i = 1; i < xx_nstates; i++) {
5764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (xx_state[i].ss_deleted)
5774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                continue;
5784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for (j = 0; j < i; j++) {
5794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (xx_state[j].ss_deleted)
5804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    continue;
5814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (samestate(&xx_state[i], &xx_state[j])) {
5824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    xx_state[i].ss_deleted++;
5834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    renamestates(xx_nstates, xx_state,
5844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                                 i, j);
5854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    changes++;
5864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    break;
5874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                }
5884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
5894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
5904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    } while (changes);
5914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
5924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PART FOUR -- GENERATE PARSING TABLES */
5954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Convert the DFA into a grammar that can be used by our parser */
5974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
5984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
5994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmconvert(dfa *d, int xx_nstates, ss_state *xx_state)
6004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
6014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i, j;
6024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ss_state *yy;
6034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    ss_arc *zz;
6044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = 0; i < xx_nstates; i++) {
6064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yy = &xx_state[i];
6074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (yy->ss_deleted)
6084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            continue;
6094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yy->ss_rename = addstate(d);
6104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
6114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = 0; i < xx_nstates; i++) {
6134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        yy = &xx_state[i];
6144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (yy->ss_deleted)
6154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            continue;
6164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (j = 0; j < yy->ss_narcs; j++) {
6174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            zz = &yy->ss_arc[j];
6184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            addarc(d, yy->ss_rename,
6194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                xx_state[zz->sa_arrow].ss_rename,
6204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                zz->sa_label);
6214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
6224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (yy->ss_finish)
6234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            addarc(d, yy->ss_rename, yy->ss_rename, 0);
6244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
6254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    d->d_initial = 0;
6274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
6284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* PART FIVE -- GLUE IT ALL TOGETHER */
6314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic grammar *
6334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmmaketables(nfagrammar *gr)
6344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
6354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
6364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfa *nf;
6374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    dfa *d;
6384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    grammar *g;
6394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (gr->gr_nnfas == 0)
6414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return NULL;
6424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    g = newgrammar(gr->gr_nfa[0]->nf_type);
6434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    /* XXX first rule must be start rule */
6444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    g->g_ll = gr->gr_ll;
6454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = 0; i < gr->gr_nnfas; i++) {
6474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        nf = gr->gr_nfa[i];
6484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (Py_DebugFlag) {
6494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            printf("Dump of NFA for '%s' ...\n", nf->nf_name);
6504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            dumpnfa(&gr->gr_ll, nf);
6514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            printf("Making DFA for '%s' ...\n", nf->nf_name);
6524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
6534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        d = adddfa(g, nf->nf_type, nf->nf_name);
6544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        makedfa(gr, gr->gr_nfa[i], d);
6554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
6564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return g;
6584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
6594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgrammar *
6614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmpgen(node *n)
6624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
6634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    nfagrammar *gr;
6644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    grammar *g;
6654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    gr = metacompile(n);
6674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    g = maketables(gr);
6684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    translatelabels(g);
6694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    addfirstsets(g);
6704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    PyObject_FREE(gr);
6714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    return g;
6724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
6734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgrammar *
6754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPy_pgen(node *n)
6764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
6774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm  return pgen(n);
6784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
6794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/*
6814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDescription
6834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm-----------
6844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmInput is a grammar in extended BNF (using * for repetition, + for
6864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmat-least-once repetition, [] for optional parts, | for alternatives and
6874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm() for grouping).  This has already been parsed and turned into a parse
6884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtree.
6894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmEach rule is considered as a regular expression in its own right.
6914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmIt is turned into a Non-deterministic Finite Automaton (NFA), which
6924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmis then turned into a Deterministic Finite Automaton (DFA), which is then
6934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmoptimized to reduce the number of states.  See [Aho&Ullman 77] chapter 3,
6944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmor similar compiler books (this technique is more often used for lexical
6954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmanalyzers).
6964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
6974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmThe DFA's are used by the parser as parsing tables in a special way
6984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmthat's probably unique.  Before they are usable, the FIRST sets of all
6994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmnon-terminals are computed.
7004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmReference
7024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm---------
7034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm[Aho&Ullman 77]
7054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
7064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    (first edition)
7074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
7084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm*/
709