1/* Compute lookahead criteria for Bison.
2
3   Copyright (C) 1984, 1986, 1989, 2000-2012 Free Software Foundation,
4   Inc.
5
6   This file is part of Bison, the GNU Compiler Compiler.
7
8   This program is free software: you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation, either version 3 of the License, or
11   (at your option) any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20
21
22/* Find which rules need lookahead in each state, and which lookahead
23   tokens they accept.  */
24
25#include <config.h>
26#include "system.h"
27
28#include <bitset.h>
29#include <bitsetv.h>
30
31#include "LR0.h"
32#include "complain.h"
33#include "derives.h"
34#include "getargs.h"
35#include "gram.h"
36#include "lalr.h"
37#include "muscle-tab.h"
38#include "nullable.h"
39#include "reader.h"
40#include "relation.h"
41#include "symtab.h"
42
43goto_number *goto_map;
44goto_number ngotos;
45state_number *from_state;
46state_number *to_state;
47bitsetv goto_follows = NULL;
48
49/* Linked list of goto numbers.  */
50typedef struct goto_list
51{
52  struct goto_list *next;
53  goto_number value;
54} goto_list;
55
56
57/* LA is an NLA by NTOKENS matrix of bits.  LA[l, i] is 1 if the rule
58   LArule[l] is applicable in the appropriate state when the next
59   token is symbol i.  If LA[l, i] and LA[l, j] are both 1 for i != j,
60   it is a conflict.  */
61
62static bitsetv LA = NULL;
63size_t nLA;
64
65
66static goto_number **includes;
67static goto_list **lookback;
68
69
70
71
72void
73set_goto_map (void)
74{
75  state_number s;
76  goto_number *temp_map;
77
78  goto_map = xcalloc (nvars + 1, sizeof *goto_map);
79  temp_map = xnmalloc (nvars + 1, sizeof *temp_map);
80
81  ngotos = 0;
82  for (s = 0; s < nstates; ++s)
83    {
84      transitions *sp = states[s]->transitions;
85      int i;
86      for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
87	{
88	  ngotos++;
89
90	  /* Abort if (ngotos + 1) would overflow.  */
91	  aver (ngotos != GOTO_NUMBER_MAXIMUM);
92
93	  goto_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
94	}
95    }
96
97  {
98    goto_number k = 0;
99    int i;
100    for (i = ntokens; i < nsyms; i++)
101      {
102	temp_map[i - ntokens] = k;
103	k += goto_map[i - ntokens];
104      }
105
106    for (i = ntokens; i < nsyms; i++)
107      goto_map[i - ntokens] = temp_map[i - ntokens];
108
109    goto_map[nsyms - ntokens] = ngotos;
110    temp_map[nsyms - ntokens] = ngotos;
111  }
112
113  from_state = xcalloc (ngotos, sizeof *from_state);
114  to_state = xcalloc (ngotos, sizeof *to_state);
115
116  for (s = 0; s < nstates; ++s)
117    {
118      transitions *sp = states[s]->transitions;
119      int i;
120      for (i = sp->num - 1; i >= 0 && TRANSITION_IS_GOTO (sp, i); --i)
121	{
122	  goto_number k = temp_map[TRANSITION_SYMBOL (sp, i) - ntokens]++;
123	  from_state[k] = s;
124	  to_state[k] = sp->states[i]->number;
125	}
126    }
127
128  free (temp_map);
129}
130
131
132goto_number
133map_goto (state_number s0, symbol_number sym)
134{
135  goto_number high;
136  goto_number low;
137  goto_number middle;
138  state_number s;
139
140  low = goto_map[sym - ntokens];
141  high = goto_map[sym - ntokens + 1] - 1;
142
143  for (;;)
144    {
145      aver (low <= high);
146      middle = (low + high) / 2;
147      s = from_state[middle];
148      if (s == s0)
149	return middle;
150      else if (s < s0)
151	low = middle + 1;
152      else
153	high = middle - 1;
154    }
155}
156
157
158static void
159initialize_F (void)
160{
161  goto_number **reads = xnmalloc (ngotos, sizeof *reads);
162  goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
163  goto_number nedges = 0;
164
165  goto_number i;
166
167  goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
168
169  for (i = 0; i < ngotos; i++)
170    {
171      state_number stateno = to_state[i];
172      transitions *sp = states[stateno]->transitions;
173
174      int j;
175      FOR_EACH_SHIFT (sp, j)
176	bitset_set (goto_follows[i], TRANSITION_SYMBOL (sp, j));
177
178      for (; j < sp->num; j++)
179	{
180	  symbol_number sym = TRANSITION_SYMBOL (sp, j);
181	  if (nullable[sym - ntokens])
182	    edge[nedges++] = map_goto (stateno, sym);
183	}
184
185      if (nedges == 0)
186	reads[i] = NULL;
187      else
188	{
189	  reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
190	  memcpy (reads[i], edge, nedges * sizeof edge[0]);
191	  reads[i][nedges] = END_NODE;
192	  nedges = 0;
193	}
194    }
195
196  relation_digraph (reads, ngotos, &goto_follows);
197
198  for (i = 0; i < ngotos; i++)
199    free (reads[i]);
200
201  free (reads);
202  free (edge);
203}
204
205
206static void
207add_lookback_edge (state *s, rule *r, goto_number gotono)
208{
209  int ri = state_reduction_find (s, r);
210  goto_list *sp = xmalloc (sizeof *sp);
211  sp->next = lookback[(s->reductions->lookahead_tokens - LA) + ri];
212  sp->value = gotono;
213  lookback[(s->reductions->lookahead_tokens - LA) + ri] = sp;
214}
215
216
217
218static void
219build_relations (void)
220{
221  goto_number *edge = xnmalloc (ngotos + 1, sizeof *edge);
222  state_number *states1 = xnmalloc (ritem_longest_rhs () + 1, sizeof *states1);
223  goto_number i;
224
225  includes = xnmalloc (ngotos, sizeof *includes);
226
227  for (i = 0; i < ngotos; i++)
228    {
229      int nedges = 0;
230      symbol_number symbol1 = states[to_state[i]]->accessing_symbol;
231      rule **rulep;
232
233      for (rulep = derives[symbol1 - ntokens]; *rulep; rulep++)
234	{
235	  bool done;
236	  int length = 1;
237	  item_number const *rp;
238	  state *s = states[from_state[i]];
239	  states1[0] = s->number;
240
241	  for (rp = (*rulep)->rhs; ! item_number_is_rule_number (*rp); rp++)
242	    {
243	      s = transitions_to (s->transitions,
244				  item_number_as_symbol_number (*rp));
245	      states1[length++] = s->number;
246	    }
247
248	  if (!s->consistent)
249	    add_lookback_edge (s, *rulep, i);
250
251	  length--;
252	  done = false;
253	  while (!done)
254	    {
255	      done = true;
256	      /* Each rhs ends in a rule number, and there is a
257		 sentinel (ritem[-1]=0) before the first rhs, so it is safe to
258		 decrement RP here.  */
259	      rp--;
260	      if (ISVAR (*rp))
261		{
262		  /* Downcasting from item_number to symbol_number.  */
263		  edge[nedges++] = map_goto (states1[--length],
264					     item_number_as_symbol_number (*rp));
265		  if (nullable[*rp - ntokens])
266		    done = false;
267		}
268	    }
269	}
270
271      if (nedges == 0)
272	includes[i] = NULL;
273      else
274	{
275	  int j;
276	  includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
277	  for (j = 0; j < nedges; j++)
278	    includes[i][j] = edge[j];
279	  includes[i][nedges] = END_NODE;
280	}
281    }
282
283  free (edge);
284  free (states1);
285
286  relation_transpose (&includes, ngotos);
287}
288
289
290
291static void
292compute_FOLLOWS (void)
293{
294  goto_number i;
295
296  relation_digraph (includes, ngotos, &goto_follows);
297
298  for (i = 0; i < ngotos; i++)
299    free (includes[i]);
300
301  free (includes);
302}
303
304
305static void
306compute_lookahead_tokens (void)
307{
308  size_t i;
309  goto_list *sp;
310
311  for (i = 0; i < nLA; i++)
312    for (sp = lookback[i]; sp; sp = sp->next)
313      bitset_or (LA[i], LA[i], goto_follows[sp->value]);
314
315  /* Free LOOKBACK. */
316  for (i = 0; i < nLA; i++)
317    LIST_FREE (goto_list, lookback[i]);
318
319  free (lookback);
320}
321
322
323/*----------------------------------------------------.
324| Count the number of lookahead tokens required for S |
325| (N_LOOKAHEAD_TOKENS member).                        |
326`----------------------------------------------------*/
327
328static int
329state_lookahead_tokens_count (state *s, bool default_reduction_only_for_accept)
330{
331  int n_lookahead_tokens = 0;
332  reductions *rp = s->reductions;
333  transitions *sp = s->transitions;
334
335  /* Transitions are only disabled during conflict resolution, and that
336     hasn't happened yet, so there should be no need to check that
337     transition 0 hasn't been disabled before checking if it is a shift.
338     However, this check was performed at one time, so we leave it as an
339     aver.  */
340  aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0));
341
342  /* We need a lookahead either to distinguish different reductions
343     (i.e., there are two or more), or to distinguish a reduction from a
344     shift.  Otherwise, it is straightforward, and the state is
345     `consistent'.  However, do not treat a state with any reductions as
346     consistent unless it is the accepting state (because there is never
347     a lookahead token that makes sense there, and so no lookahead token
348     should be read) if the user has otherwise disabled default
349     reductions.  */
350  if (rp->num > 1
351      || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0))
352      || (rp->num == 1 && rp->rules[0]->number != 0
353          && default_reduction_only_for_accept))
354    n_lookahead_tokens += rp->num;
355  else
356    s->consistent = 1;
357
358  return n_lookahead_tokens;
359}
360
361
362/*----------------------------------------------------.
363| Compute LA, NLA, and the lookahead_tokens members.  |
364`----------------------------------------------------*/
365
366void
367initialize_LA (void)
368{
369  state_number i;
370  bitsetv pLA;
371  bool default_reduction_only_for_accept;
372  {
373    char *default_reductions =
374      muscle_percent_define_get ("lr.default-reductions");
375    default_reduction_only_for_accept =
376      0 == strcmp (default_reductions, "accepting");
377    free (default_reductions);
378  }
379
380  /* Compute the total number of reductions requiring a lookahead.  */
381  nLA = 0;
382  for (i = 0; i < nstates; i++)
383    nLA +=
384      state_lookahead_tokens_count (states[i],
385                                    default_reduction_only_for_accept);
386  /* Avoid having to special case 0.  */
387  if (!nLA)
388    nLA = 1;
389
390  pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
391
392  /* Initialize the members LOOKAHEAD_TOKENS for each state whose reductions
393     require lookahead tokens.  */
394  for (i = 0; i < nstates; i++)
395    {
396      int count =
397        state_lookahead_tokens_count (states[i],
398                                      default_reduction_only_for_accept);
399      if (count)
400	{
401	  states[i]->reductions->lookahead_tokens = pLA;
402	  pLA += count;
403	}
404    }
405}
406
407
408/*---------------------------------------------.
409| Output the lookahead tokens for each state.  |
410`---------------------------------------------*/
411
412static void
413lookahead_tokens_print (FILE *out)
414{
415  state_number i;
416  int j, k;
417  fprintf (out, "Lookahead tokens: BEGIN\n");
418  for (i = 0; i < nstates; ++i)
419    {
420      reductions *reds = states[i]->reductions;
421      bitset_iterator iter;
422      int n_lookahead_tokens = 0;
423
424      if (reds->lookahead_tokens)
425	for (k = 0; k < reds->num; ++k)
426	  if (reds->lookahead_tokens[k])
427	    ++n_lookahead_tokens;
428
429      fprintf (out, "State %d: %d lookahead tokens\n",
430	       i, n_lookahead_tokens);
431
432      if (reds->lookahead_tokens)
433	for (j = 0; j < reds->num; ++j)
434	  BITSET_FOR_EACH (iter, reds->lookahead_tokens[j], k, 0)
435	  {
436	    fprintf (out, "   on %d (%s) -> rule %d\n",
437		     k, symbols[k]->tag,
438		     reds->rules[j]->number);
439	  };
440    }
441  fprintf (out, "Lookahead tokens: END\n");
442}
443
444void
445lalr (void)
446{
447  initialize_LA ();
448  set_goto_map ();
449  initialize_F ();
450  lookback = xcalloc (nLA, sizeof *lookback);
451  build_relations ();
452  compute_FOLLOWS ();
453  compute_lookahead_tokens ();
454
455  if (trace_flag & trace_sets)
456    lookahead_tokens_print (stderr);
457}
458
459
460void
461lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
462{
463  goto_number ngotos_reachable = 0;
464  symbol_number nonterminal = 0;
465  aver (nsyms == nvars + ntokens);
466  {
467    goto_number i;
468    for (i = 0; i < ngotos; ++i)
469      {
470        while (i == goto_map[nonterminal])
471          goto_map[nonterminal++] = ngotos_reachable;
472        /* If old_to_new[from_state[i]] = nstates_old, remove this goto
473           entry.  */
474        if (old_to_new[from_state[i]] != nstates_old)
475          {
476            /* from_state[i] is not removed, so it and thus to_state[i] are
477               reachable, so to_state[i] != nstates_old.  */
478            aver (old_to_new[to_state[i]] != nstates_old);
479            from_state[ngotos_reachable] = old_to_new[from_state[i]];
480            to_state[ngotos_reachable] = old_to_new[to_state[i]];
481            ++ngotos_reachable;
482          }
483      }
484  }
485  while (nonterminal <= nvars)
486    {
487      aver (ngotos == goto_map[nonterminal]);
488      goto_map[nonterminal++] = ngotos_reachable;
489    }
490  ngotos = ngotos_reachable;
491}
492
493
494void
495lalr_free (void)
496{
497  state_number s;
498  for (s = 0; s < nstates; ++s)
499    states[s]->reductions->lookahead_tokens = NULL;
500  bitsetv_free (LA);
501}
502