14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Parser accelerator module */
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* The parser as originally conceived had disappointing performance.
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   This module does some precomputation that speeds up the selection
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   of a DFA based upon a token, turning a search through an array
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   into a simple indexing operation.  The parser now cannot work
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   without the accelerators installed.  Note that the accelerators
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   are installed dynamically when the parser is initialized, they
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   are not part of the static data structure written on graminit.[ch]
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm   by the parser generator. */
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "pgenheaders.h"
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "grammar.h"
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "node.h"
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "token.h"
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "parser.h"
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Forward references */
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void fixdfa(grammar *, dfa *);
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void fixstate(grammar *, state *);
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPyGrammar_AddAccelerators(grammar *g)
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    dfa *d;
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    d = g->g_dfa;
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = g->g_ndfas; --i >= 0; d++)
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fixdfa(g, d);
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    g->g_accel = 1;
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPyGrammar_RemoveAccelerators(grammar *g)
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    dfa *d;
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int i;
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    g->g_accel = 0;
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    d = g->g_dfa;
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (i = g->g_ndfas; --i >= 0; d++) {
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        state *s;
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        int j;
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        s = d->d_state;
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (j = 0; j < d->d_nstates; j++, s++) {
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (s->s_accel)
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                PyObject_FREE(s->s_accel);
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            s->s_accel = NULL;
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfixdfa(grammar *g, dfa *d)
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    state *s;
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int j;
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    s = d->d_state;
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (j = 0; j < d->d_nstates; j++, s++)
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fixstate(g, s);
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfixstate(grammar *g, state *s)
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    arc *a;
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int k;
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int *accel;
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    int nl = g->g_ll.ll_nlabels;
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    s->s_accept = 0;
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    accel = (int *) PyObject_MALLOC(nl * sizeof(int));
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (accel == NULL) {
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        fprintf(stderr, "no mem to build parser accelerators\n");
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        exit(1);
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (k = 0; k < nl; k++)
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        accel[k] = -1;
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    a = s->s_arc;
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (k = s->s_narcs; --k >= 0; a++) {
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        int lbl = a->a_lbl;
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        label *l = &g->g_ll.ll_label[lbl];
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        int type = l->lb_type;
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (a->a_arrow >= (1 << 7)) {
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            printf("XXX too many states!\n");
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            continue;
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (ISNONTERMINAL(type)) {
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            dfa *d1 = PyGrammar_FindDFA(g, type);
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            int ibit;
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            if (type - NT_OFFSET >= (1 << 7)) {
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                printf("XXX too high nonterminal number!\n");
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                continue;
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                if (testbit(d1->d_first, ibit)) {
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    if (accel[ibit] != -1)
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        printf("XXX ambiguity!\n");
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                    accel[ibit] = a->a_arrow | (1 << 7) |
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                        ((type - NT_OFFSET) << 8);
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                }
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            }
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else if (lbl == EMPTY)
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            s->s_accept = 1;
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else if (lbl >= 0 && lbl < nl)
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            accel[lbl] = a->a_arrow;
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    while (nl > 0 && accel[nl-1] == -1)
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        nl--;
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for (k = 0; k < nl && accel[k] == -1;)
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        k++;
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    if (k < nl) {
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        int i;
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int));
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if (s->s_accel == NULL) {
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            fprintf(stderr, "no mem to add parser accelerators\n");
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            exit(1);
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        }
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        s->s_lower = k;
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        s->s_upper = nl;
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        for (i = 0; k < nl; i++, k++)
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            s->s_accel[i] = accel[k];
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    }
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    PyObject_FREE(accel);
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm}
126