14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Parser accelerator module */ 34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* The parser as originally conceived had disappointing performance. 54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm This module does some precomputation that speeds up the selection 64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm of a DFA based upon a token, turning a search through an array 74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm into a simple indexing operation. The parser now cannot work 84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm without the accelerators installed. Note that the accelerators 94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm are installed dynamically when the parser is initialized, they 104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm are not part of the static data structure written on graminit.[ch] 114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm by the parser generator. */ 124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "pgenheaders.h" 144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "grammar.h" 154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "node.h" 164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "token.h" 174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#include "parser.h" 184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm/* Forward references */ 204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void fixdfa(grammar *, dfa *); 214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void fixstate(grammar *, state *); 224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid 244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPyGrammar_AddAccelerators(grammar *g) 254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dfa *d; 274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm d = g->g_dfa; 294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = g->g_ndfas; --i >= 0; d++) 304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fixdfa(g, d); 314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g->g_accel = 1; 324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmvoid 354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmPyGrammar_RemoveAccelerators(grammar *g) 364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dfa *d; 384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm g->g_accel = 0; 404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm d = g->g_dfa; 414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = g->g_ndfas; --i >= 0; d++) { 424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm state *s; 434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int j; 444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s = d->d_state; 454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (j = 0; j < d->d_nstates; j++, s++) { 464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s->s_accel) 474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyObject_FREE(s->s_accel); 484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_accel = NULL; 494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfixdfa(grammar *g, dfa *d) 554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm state *s; 574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int j; 584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s = d->d_state; 594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (j = 0; j < d->d_nstates; j++, s++) 604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fixstate(g, s); 614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm 634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmstatic void 644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfixstate(grammar *g, state *s) 654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm{ 664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm arc *a; 674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int k; 684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int *accel; 694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int nl = g->g_ll.ll_nlabels; 704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_accept = 0; 714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm accel = (int *) PyObject_MALLOC(nl * sizeof(int)); 724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (accel == NULL) { 734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fprintf(stderr, "no mem to build parser accelerators\n"); 744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm exit(1); 754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (k = 0; k < nl; k++) 774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm accel[k] = -1; 784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm a = s->s_arc; 794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (k = s->s_narcs; --k >= 0; a++) { 804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int lbl = a->a_lbl; 814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm label *l = &g->g_ll.ll_label[lbl]; 824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int type = l->lb_type; 834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (a->a_arrow >= (1 << 7)) { 844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("XXX too many states!\n"); 854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (ISNONTERMINAL(type)) { 884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm dfa *d1 = PyGrammar_FindDFA(g, type); 894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int ibit; 904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (type - NT_OFFSET >= (1 << 7)) { 914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("XXX too high nonterminal number!\n"); 924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm continue; 934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) { 954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (testbit(d1->d_first, ibit)) { 964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (accel[ibit] != -1) 974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm printf("XXX ambiguity!\n"); 984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm accel[ibit] = a->a_arrow | (1 << 7) | 994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm ((type - NT_OFFSET) << 8); 1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else if (lbl == EMPTY) 1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_accept = 1; 1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm else if (lbl >= 0 && lbl < nl) 1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm accel[lbl] = a->a_arrow; 1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm while (nl > 0 && accel[nl-1] == -1) 1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm nl--; 1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (k = 0; k < nl && accel[k] == -1;) 1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm k++; 1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (k < nl) { 1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm int i; 1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int)); 1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm if (s->s_accel == NULL) { 1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm fprintf(stderr, "no mem to add parser accelerators\n"); 1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm exit(1); 1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_lower = k; 1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_upper = nl; 1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm for (i = 0; k < nl; i++, k++) 1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm s->s_accel[i] = accel[k]; 1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm } 1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm PyObject_FREE(accel); 1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm} 126