1/* IELR main implementation.
2
3   Copyright (C) 2009-2012 Free Software Foundation, Inc.
4
5   This file is part of Bison, the GNU Compiler Compiler.
6
7   This program is free software: you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation, either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19
20#include <config.h>
21#include "system.h"
22
23#include "ielr.h"
24
25#include <bitset.h>
26#include <timevar.h>
27
28#include "AnnotationList.h"
29#include "derives.h"
30#include "getargs.h"
31#include "lalr.h"
32#include "muscle-tab.h"
33#include "nullable.h"
34#include "relation.h"
35#include "state.h"
36#include "symtab.h"
37
38/** Records the value of the \%define variable lr.type.  */
39typedef enum { LR_TYPE__LALR, LR_TYPE__IELR, LR_TYPE__CANONICAL_LR } LrType;
40
41/**
42 * \post:
43 *   - \c result = a new \c bitset of size \c ::nritems such that any bit \c i
44 *     is set iff <tt>ritem[i]</tt> is a nonterminal after which all ritems in
45 *     the same RHS are nullable nonterminals.  In other words, the follows of
46 *     a goto on <tt>ritem[i]</tt> include the lookahead set of the item.
47 */
48static bitset
49ielr_compute_ritem_sees_lookahead_set (void)
50{
51  bitset result = bitset_create (nritems, BITSET_FIXED);
52  unsigned int i = nritems-1;
53  while (i>0)
54    {
55      --i;
56      while (!item_number_is_rule_number (ritem[i])
57             && ISVAR (ritem[i])
58             && nullable [item_number_as_symbol_number (ritem[i]) - ntokens])
59        bitset_set (result, i--);
60      if (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i]))
61        bitset_set (result, i--);
62      while (!item_number_is_rule_number (ritem[i]) && i>0)
63        --i;
64    }
65  if (trace_flag & trace_ielr)
66    {
67      fprintf (stderr, "ritem_sees_lookahead_set:\n");
68      debug_bitset (result);
69      fprintf (stderr, "\n");
70    }
71  return result;
72}
73
74/**
75 * \pre:
76 *   - \c ritem_sees_lookahead_set was computed by
77 *     \c ielr_compute_ritem_sees_lookahead_set.
78 * \post:
79 *   - Each of \c *edgesp and \c *edge_countsp is a new array of size
80 *     \c ::ngotos.
81 *   - <tt>(*edgesp)[i]</tt> points to a \c goto_number array of size
82 *     <tt>(*edge_countsp)[i]+1</tt>.
83 *   - In such a \c goto_number array, the last element is \c ::END_NODE.
84 *   - All remaining elements are the indices of the gotos to which there is an
85 *     internal follow edge from goto \c i.
86 *   - There is an internal follow edge from goto \c i to goto \c j iff both:
87 *     - The from states of gotos \c i and \c j are the same.
88 *     - The transition nonterminal for goto \c i appears as the first RHS
89 *       symbol of at least one production for which both:
90 *       - The LHS is the transition symbol of goto \c j.
91 *       - All other RHS symbols are nullable nonterminals.
92 *     - In other words, the follows of goto \c i include the follows of
93 *       goto \c j and it's an internal edge because the from states are the
94 *       same.
95 */
96static void
97ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set,
98                                    goto_number ***edgesp, int **edge_countsp)
99{
100  *edgesp = xnmalloc (ngotos, sizeof **edgesp);
101  *edge_countsp = xnmalloc (ngotos, sizeof **edge_countsp);
102  {
103    bitset sources = bitset_create (ngotos, BITSET_FIXED);
104    goto_number i;
105    for (i = 0; i < ngotos; ++i)
106      (*edge_countsp)[i] = 0;
107    for (i = 0; i < ngotos; ++i)
108      {
109        int nsources = 0;
110        {
111          rule **rulep;
112          for (rulep = derives[states[to_state[i]]->accessing_symbol
113                               - ntokens];
114               *rulep;
115               ++rulep)
116            {
117              /* If there is at least one RHS symbol, if the first RHS symbol
118                 is a nonterminal, and if all remaining RHS symbols (if any)
119                 are nullable nonterminals, create an edge from the LHS
120                 symbol's goto to the first RHS symbol's goto such that the RHS
121                 symbol's goto will be the source of the edge after the
122                 eventual relation_transpose below.
123
124                 Unlike in ielr_compute_always_follows, I use a bitset for
125                 edges rather than an array because it is possible that
126                 multiple RHS's with the same first symbol could fit and thus
127                 that we could end up with redundant edges.  With the
128                 possibility of redundant edges, it's hard to know ahead of
129                 time how large to make such an array.  Another possible
130                 redundancy is that source and destination might be the same
131                 goto.  Eliminating all these possible redundancies now might
132                 possibly help performance a little.  I have not proven any of
133                 this, but I'm guessing the bitset shouldn't entail much of a
134                 performance penalty, if any.  */
135              if (bitset_test (ritem_sees_lookahead_set,
136                               (*rulep)->rhs - ritem))
137                {
138                  goto_number source =
139                    map_goto (from_state[i],
140                              item_number_as_symbol_number (*(*rulep)->rhs));
141                  if (i != source && !bitset_test (sources, source))
142                    {
143                      bitset_set (sources, source);
144                      ++nsources;
145                      ++(*edge_countsp)[source];
146                    }
147                }
148            }
149        }
150        if (nsources == 0)
151          (*edgesp)[i] = NULL;
152        else
153          {
154            (*edgesp)[i] = xnmalloc (nsources + 1, sizeof *(*edgesp)[i]);
155            {
156              bitset_iterator biter_source;
157              bitset_bindex source;
158              int j = 0;
159              BITSET_FOR_EACH (biter_source, sources, source, 0)
160                (*edgesp)[i][j++] = source;
161            }
162            (*edgesp)[i][nsources] = END_NODE;
163          }
164        bitset_zero (sources);
165      }
166    bitset_free (sources);
167  }
168
169  relation_transpose (edgesp, ngotos);
170
171  if (trace_flag & trace_ielr)
172    {
173      fprintf (stderr, "internal_follow_edges:\n");
174      relation_print (*edgesp, ngotos, stderr);
175    }
176}
177
178/**
179 * \pre:
180 *   - \c ritem_sees_lookahead_set was computed by
181 *     \c ielr_compute_ritem_sees_lookahead_set.
182 *   - \c internal_follow_edges was computed by
183 *     \c ielr_compute_internal_follow_edges.
184 * \post:
185 *   - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows
186 *     is \c ngotos and the number of columns is maximum number of kernel items
187 *     in any state.
188 *   - <tt>(*follow_kernel_itemsp)[i][j]</tt> is set iff the follows of goto
189 *     \c i include the lookahead set of item \c j in the from state of goto
190 *     \c i.
191 *   - Thus, <tt>(*follow_kernel_itemsp)[i][j]</tt> is always unset if there is
192 *     no item \c j in the from state of goto \c i.
193 */
194static void
195ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set,
196                                  goto_number **internal_follow_edges,
197                                  bitsetv *follow_kernel_itemsp)
198{
199  {
200    size_t max_nitems = 0;
201    state_number i;
202    for (i = 0; i < nstates; ++i)
203      if (states[i]->nitems > max_nitems)
204        max_nitems = states[i]->nitems;
205    *follow_kernel_itemsp = bitsetv_create (ngotos, max_nitems, BITSET_FIXED);
206  }
207  {
208    goto_number i;
209    for (i = 0; i < ngotos; ++i)
210      {
211        size_t nitems = states[from_state[i]]->nitems;
212        item_number *items = states[from_state[i]]->items;
213        size_t j;
214        for (j = 0; j < nitems; ++j)
215          /* If this item has this goto and if all subsequent symbols in this
216             RHS (if any) are nullable nonterminals, then record this item as
217             one whose lookahead set is included in this goto's follows.  */
218          if (item_number_is_symbol_number (ritem[items[j]])
219              && item_number_as_symbol_number (ritem[items[j]])
220                == states[to_state[i]]->accessing_symbol
221              && bitset_test (ritem_sees_lookahead_set, items[j]))
222            bitset_set ((*follow_kernel_itemsp)[i], j);
223      }
224  }
225  relation_digraph (internal_follow_edges, ngotos, follow_kernel_itemsp);
226
227  if (trace_flag & trace_ielr)
228    {
229      fprintf (stderr, "follow_kernel_items:\n");
230      debug_bitsetv (*follow_kernel_itemsp);
231    }
232}
233
234/**
235 * \pre
236 *   - \c *edgesp and \c edge_counts were computed by
237 *     \c ielr_compute_internal_follow_edges.
238 * \post
239 *   - \c *always_followsp is a new \c bitsetv with \c ngotos rows and
240 *     \c ntokens columns.
241 *   - <tt>(*always_followsp)[i][j]</tt> is set iff token \c j is an always
242 *     follow (that is, it's computed by internal and successor edges) of goto
243 *     \c i.
244 *   - Rows of \c *edgesp have been realloc'ed and extended to include
245 *     successor follow edges.  \c edge_counts has not been updated.
246 */
247static void
248ielr_compute_always_follows (goto_number ***edgesp,
249                             int const edge_counts[],
250                             bitsetv *always_followsp)
251{
252  *always_followsp = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
253  {
254    goto_number *edge_array = xnmalloc (ngotos, sizeof *edge_array);
255    goto_number i;
256    for (i = 0; i < ngotos; ++i)
257      {
258        goto_number nedges = edge_counts[i];
259        {
260          int j;
261          transitions *trans = states[to_state[i]]->transitions;
262          FOR_EACH_SHIFT (trans, j)
263            bitset_set ((*always_followsp)[i], TRANSITION_SYMBOL (trans, j));
264          for (; j < trans->num; ++j)
265            {
266              symbol_number sym = TRANSITION_SYMBOL (trans, j);
267              if (nullable[sym - ntokens])
268                edge_array[nedges++] = map_goto (to_state[i], sym);
269            }
270        }
271        if (nedges - edge_counts[i])
272          {
273            (*edgesp)[i] =
274              xnrealloc ((*edgesp)[i], nedges + 1, sizeof *(*edgesp)[i]);
275            memcpy ((*edgesp)[i] + edge_counts[i], edge_array + edge_counts[i],
276                    (nedges - edge_counts[i]) * sizeof *(*edgesp)[i]);
277            (*edgesp)[i][nedges] = END_NODE;
278          }
279      }
280    free (edge_array);
281  }
282  relation_digraph (*edgesp, ngotos, always_followsp);
283
284  if (trace_flag & trace_ielr)
285    {
286      fprintf (stderr, "always follow edges:\n");
287      relation_print (*edgesp, ngotos, stderr);
288      fprintf (stderr, "always_follows:\n");
289      debug_bitsetv (*always_followsp);
290    }
291}
292
293/**
294 * \post
295 *   - \c result is a new array of size \c ::nstates.
296 *   - <tt>result[i]</tt> is an array of pointers to the predecessor
297 *     <tt>state</tt>'s of state \c i.
298 *   - The last element of such an array is \c NULL.
299 */
300static state ***
301ielr_compute_predecessors (void)
302{
303  state_number i;
304  int *predecessor_counts = xnmalloc (nstates, sizeof *predecessor_counts);
305  state ***result = xnmalloc (nstates, sizeof *result);
306  for (i = 0; i < nstates; ++i)
307    predecessor_counts[i] = 0;
308  for (i = 0; i < nstates; ++i)
309    {
310      int j;
311      for (j = 0; j < states[i]->transitions->num; ++j)
312        ++predecessor_counts[states[i]->transitions->states[j]->number];
313    }
314  for (i = 0; i < nstates; ++i)
315    {
316      result[i] = xnmalloc (predecessor_counts[i]+1, sizeof *result[i]);
317      result[i][predecessor_counts[i]] = NULL;
318      predecessor_counts[i] = 0;
319    }
320  for (i = 0; i < nstates; ++i)
321    {
322      int j;
323      for (j = 0; j < states[i]->transitions->num; ++j)
324        {
325          state_number k = states[i]->transitions->states[j]->number;
326          result[k][predecessor_counts[k]++] = states[i];
327        }
328    }
329  free (predecessor_counts);
330  return result;
331}
332
333/**
334 * \post
335 *   - \c *follow_kernel_itemsp and \c *always_followsp were computed by
336 *     \c ielr_compute_follow_kernel_items and
337 *     \c ielr_compute_always_follows.
338 *   - Iff <tt>predecessorsp != NULL</tt>, then \c *predecessorsp was computed
339 *     by \c ielr_compute_predecessors.
340 */
341static void
342ielr_compute_auxiliary_tables (bitsetv *follow_kernel_itemsp,
343                               bitsetv *always_followsp,
344                               state ****predecessorsp)
345{
346  goto_number **edges;
347  int *edge_counts;
348  {
349    bitset ritem_sees_lookahead_set = ielr_compute_ritem_sees_lookahead_set ();
350    ielr_compute_internal_follow_edges (ritem_sees_lookahead_set,
351                                        &edges, &edge_counts);
352    ielr_compute_follow_kernel_items (ritem_sees_lookahead_set, edges,
353                                      follow_kernel_itemsp);
354    bitset_free (ritem_sees_lookahead_set);
355  }
356  ielr_compute_always_follows (&edges, edge_counts, always_followsp);
357  {
358    int i;
359    for (i = 0; i < ngotos; ++i)
360      free (edges[i]);
361  }
362  free (edges);
363  free (edge_counts);
364  if (predecessorsp)
365    *predecessorsp = ielr_compute_predecessors ();
366}
367
368/**
369 * \note
370 *   - FIXME: It might be an interesting experiment to compare the space and
371 *     time efficiency of computing \c item_lookahead_sets either:
372 *     - Fully up front.
373 *     - Just-in-time, as implemented below.
374 *     - Not at all.  That is, just let annotations continue even when
375 *       unnecessary.
376 */
377bool
378ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item,
379                         symbol_number lookahead, state ***predecessors,
380                         bitset **item_lookahead_sets)
381{
382  if (!item_lookahead_sets[s->number])
383    {
384      size_t i;
385      item_lookahead_sets[s->number] =
386        xnmalloc (s->nitems, sizeof item_lookahead_sets[s->number][0]);
387      for (i = 0; i < s->nitems; ++i)
388        item_lookahead_sets[s->number][i] = NULL;
389    }
390  if (!item_lookahead_sets[s->number][item])
391    {
392      item_lookahead_sets[s->number][item] =
393        bitset_create (ntokens, BITSET_FIXED);
394      /* If this kernel item is the beginning of a RHS, it must be the kernel
395         item in the start state, and so its LHS has no follows and no goto to
396         check.  If, instead, this kernel item is the successor of the start
397         state's kernel item, there are still no follows and no goto.  This
398         situation is fortunate because we want to avoid the - 2 below in both
399         cases.
400
401         Actually, IELR(1) should never invoke this function for either of
402         those cases because (1) follow_kernel_items will never reference a
403         kernel item for this RHS because the end token blocks sight of the
404         lookahead set from the RHS's only nonterminal, and (2) no reduction
405         has a lookback dependency on this lookahead set.  Nevertheless, I
406         didn't change this test to an aver just in case the usage of this
407         function evolves to need those two cases.  In both cases, the current
408         implementation returns the right result.  */
409      if (s->items[item] > 1)
410        {
411          /* If the LHS symbol of this item isn't known (because this is a
412             top-level invocation), go get it.  */
413          if (!lhs)
414            {
415              unsigned int i;
416              for (i = s->items[item];
417                   !item_number_is_rule_number (ritem[i]);
418                   ++i)
419                ;
420              lhs = rules[item_number_as_rule_number (ritem[i])].lhs->number;
421            }
422          /* If this kernel item is next to the beginning of the RHS, then
423             check all predecessors' goto follows for the LHS.  */
424          if (item_number_is_rule_number (ritem[s->items[item] - 2]))
425            {
426              state **predecessor;
427              aver (lhs != accept->number);
428              for (predecessor = predecessors[s->number];
429                   *predecessor;
430                   ++predecessor)
431                bitset_or (item_lookahead_sets[s->number][item],
432                           item_lookahead_sets[s->number][item],
433                           goto_follows[map_goto ((*predecessor)->number,
434                                                  lhs)]);
435            }
436          /* If this kernel item is later in the RHS, then check all
437             predecessor items' lookahead sets.  */
438          else
439            {
440              state **predecessor;
441              for (predecessor = predecessors[s->number];
442                   *predecessor;
443                   ++predecessor)
444                {
445                  size_t predecessor_item;
446                  for (predecessor_item = 0;
447                       predecessor_item < (*predecessor)->nitems;
448                       ++predecessor_item)
449                    if ((*predecessor)->items[predecessor_item]
450                        == s->items[item] - 1)
451                      break;
452                  aver (predecessor_item != (*predecessor)->nitems);
453                  ielr_item_has_lookahead (*predecessor, lhs,
454                                           predecessor_item, 0 /*irrelevant*/,
455                                           predecessors, item_lookahead_sets);
456                  bitset_or (item_lookahead_sets[s->number][item],
457                             item_lookahead_sets[s->number][item],
458                             item_lookahead_sets[(*predecessor)->number]
459                                                [predecessor_item]);
460                }
461            }
462        }
463    }
464  return bitset_test (item_lookahead_sets[s->number][item], lookahead);
465}
466
467/**
468 * \pre
469 *   - \c follow_kernel_items, \c always_follows, and \c predecessors
470 *     were computed by \c ielr_compute_auxiliary_tables.
471 * \post
472 *   - Each of <tt>*inadequacy_listsp</tt> and <tt>*annotation_listsp</tt>
473 *     points to a new array of size \c ::nstates.
474 *   - For <tt>0 <= i < ::nstates</tt>:
475 *     - <tt>(*inadequacy_listsp)[i]</tt> contains the \c InadequacyList head
476 *       node for <tt>states[i]</tt>.
477 *     - <tt>(*annotation_listsp)[i]</tt> contains the \c AnnotationList head
478 *       node for <tt>states[i]</tt>.
479 *   - <tt>*max_annotationsp</tt> is the maximum number of annotations per
480 *     state.
481 */
482static void
483ielr_compute_annotation_lists (bitsetv follow_kernel_items,
484                               bitsetv always_follows, state ***predecessors,
485                               AnnotationIndex *max_annotationsp,
486                               InadequacyList ***inadequacy_listsp,
487                               AnnotationList ***annotation_listsp,
488                               struct obstack *annotations_obstackp)
489{
490  bitset **item_lookahead_sets =
491    xnmalloc (nstates, sizeof *item_lookahead_sets);
492  AnnotationIndex *annotation_counts =
493    xnmalloc (nstates, sizeof *annotation_counts);
494  ContributionIndex max_contributions = 0;
495  unsigned int total_annotations = 0;
496  state_number i;
497
498  *inadequacy_listsp = xnmalloc (nstates, sizeof **inadequacy_listsp);
499  *annotation_listsp = xnmalloc (nstates, sizeof **annotation_listsp);
500  for (i = 0; i < nstates; ++i)
501    {
502      item_lookahead_sets[i] = NULL;
503      (*inadequacy_listsp)[i] = NULL;
504      (*annotation_listsp)[i] = NULL;
505      annotation_counts[i] = 0;
506    }
507  {
508    InadequacyListNodeCount inadequacy_list_node_count = 0;
509    for (i = 0; i < nstates; ++i)
510      AnnotationList__compute_from_inadequacies (
511        states[i], follow_kernel_items, always_follows, predecessors,
512        item_lookahead_sets, *inadequacy_listsp, *annotation_listsp,
513        annotation_counts, &max_contributions, annotations_obstackp,
514        &inadequacy_list_node_count);
515  }
516  *max_annotationsp = 0;
517  for (i = 0; i < nstates; ++i)
518    {
519      if (annotation_counts[i] > *max_annotationsp)
520        *max_annotationsp = annotation_counts[i];
521      total_annotations += annotation_counts[i];
522    }
523  if (trace_flag & trace_ielr)
524    {
525      for (i = 0; i < nstates; ++i)
526        {
527          fprintf (stderr, "Inadequacy annotations for state %d:\n", i);
528          AnnotationList__debug ((*annotation_listsp)[i],
529                                 states[i]->nitems, 2);
530        }
531      fprintf (stderr, "Number of LR(0)/LALR(1) states: %d\n", nstates);
532      fprintf (stderr, "Average number of annotations per state: %f\n",
533               (float)total_annotations/nstates);
534      fprintf (stderr, "Max number of annotations per state: %d\n",
535               *max_annotationsp);
536      fprintf (stderr, "Max number of contributions per annotation: %d\n",
537               max_contributions);
538    }
539  for (i = 0; i < nstates; ++i)
540    if (item_lookahead_sets[i])
541      {
542        size_t j;
543        for (j = 0; j < states[i]->nitems; ++j)
544          if (item_lookahead_sets[i][j])
545            bitset_free (item_lookahead_sets[i][j]);
546        free (item_lookahead_sets[i]);
547      }
548  free (item_lookahead_sets);
549  free (annotation_counts);
550}
551
552typedef struct state_list {
553  struct state_list *next;
554  state *state;
555  /** Has this state been recomputed as a successor of another state?  */
556  bool recomputedAsSuccessor;
557  /**
558   * \c NULL iff all lookahead sets are empty.  <tt>lookaheads[i] = NULL</tt>
559   * iff the lookahead set on item \c i is empty.
560   */
561  bitset *lookaheads;
562  /**
563   * nextIsocore is the next state in a circularly linked-list of all states
564   * with the same core.  The one originally computed by generate_states in
565   * LR0.c is lr0Isocore.
566   */
567  struct state_list *lr0Isocore;
568  struct state_list *nextIsocore;
569} state_list;
570
571/**
572 * \pre
573 *   - \c follow_kernel_items and \c always_follows were computed by
574 *     \c ielr_compute_auxiliary_tables.
575 *   - <tt>n->class = nterm_sym</tt>.
576 * \post
577 *   - \c follow_set contains the follow set for the goto on nonterminal \c n
578 *     in state \c s based on the lookaheads stored in <tt>s->lookaheads</tt>.
579 */
580static void
581ielr_compute_goto_follow_set (bitsetv follow_kernel_items,
582                              bitsetv always_follows, state_list *s,
583                              symbol *n, bitset follow_set)
584{
585  goto_number n_goto = map_goto (s->lr0Isocore->state->number, n->number);
586  bitset_copy (follow_set, always_follows[n_goto]);
587  if (s->lookaheads)
588    {
589      bitset_iterator biter_item;
590      bitset_bindex item;
591      BITSET_FOR_EACH (biter_item, follow_kernel_items[n_goto], item, 0)
592        if (s->lookaheads[item])
593          bitset_or (follow_set, follow_set, s->lookaheads[item]);
594    }
595}
596
597/**
598 * \pre
599 *   - \c follow_kernel_items and \c always_follows were computed by
600 *     \c ielr_compute_auxiliary_tables.
601 *   - \c lookahead_filter was computed by
602 *     \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore
603 *     of \c t.
604 *   - The number of rows in \c lookaheads is at least the number of items in
605 *     \c t, and the number of columns is \c ::ntokens.
606 * \post
607 *   - <tt>lookaheads[i][j]</tt> is set iff both:
608 *     - <tt>lookahead_filter[i][j]</tt> is set.
609 *     - The isocore of \c t that will be the transition successor of \c s will
610 *       inherit from \c s token \c j into the lookahead set of item \c i.
611 */
612static void
613ielr_compute_lookaheads (bitsetv follow_kernel_items, bitsetv always_follows,
614                         state_list *s, state *t, bitsetv lookahead_filter,
615                         bitsetv lookaheads)
616{
617  size_t s_item = 0;
618  size_t t_item;
619  bitsetv_zero (lookaheads);
620  for (t_item = 0; t_item < t->nitems; ++t_item)
621    {
622      /* If this kernel item is the beginning of a RHS, it must be the
623         kernel item in the start state, but t is supposed to be a successor
624         state.  If, instead, this kernel item is the successor of the start
625         state's kernel item, the lookahead set is empty.  This second case is
626         a special case to avoid the - 2 below, but the next successor can be
627         handled fine without special casing it.  */
628      aver (t->items[t_item] != 0);
629      if (t->items[t_item] > 1
630          && !bitset_empty_p (lookahead_filter[t_item]))
631        {
632          if (item_number_is_rule_number (ritem[t->items[t_item] - 2]))
633            {
634              unsigned int rule_item;
635              for (rule_item = t->items[t_item];
636                   !item_number_is_rule_number (ritem[rule_item]);
637                   ++rule_item)
638                ;
639              ielr_compute_goto_follow_set (
640                follow_kernel_items, always_follows, s,
641                rules[item_number_as_rule_number (ritem[rule_item])].lhs,
642                lookaheads[t_item]);
643            }
644          else if (s->lookaheads)
645            {
646              /* We don't have to start the s item search at the beginning
647                 every time because items from both states are sorted by their
648                 indices in ritem.  */
649              for (; s_item < s->state->nitems; ++s_item)
650                if (s->state->items[s_item] == t->items[t_item] - 1)
651                  break;
652              aver (s_item != s->state->nitems);
653              if (s->lookaheads[s_item])
654                bitset_copy (lookaheads[t_item], s->lookaheads[s_item]);
655            }
656          bitset_and (lookaheads[t_item],
657                      lookaheads[t_item], lookahead_filter[t_item]);
658        }
659    }
660}
661
662/**
663 * \pre
664 *   - \c follow_kernel_items and \c always_follows were computed by
665 *     \c ielr_compute_auxiliary_tables.
666 *   - Either:
667 *     - <tt>annotation_lists = NULL</tt> and all bits in work2 are set.
668 *     - \c annotation_lists was computed by \c ielr_compute_annotation_lists.
669 *   - The number of rows in each of \c lookaheads and \c work2 is the maximum
670 *     number of items in any state.  The number of columns in each is
671 *     \c ::ntokens.
672 *   - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t.
673 *   - \c ::nstates is the total number of states, some not yet fully computed,
674 *     in the list ending at \c *last_statep.  It is at least the number of
675 *     original LR(0) states.
676 *   - The size of \c work1 is at least the number of annotations for the LR(0)
677 *     isocore of \c t.
678 * \post
679 *   - Either:
680 *     - In the case that <tt>annotation_lists != NULL</tt>,
681 *       <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if
682 *       permitted by the annotations for the original LR(0) isocore of \c t.
683 *       If this changed the lookaheads in that isocore, those changes were
684 *       propagated to all already computed transition successors recursively
685 *       possibly resulting in the splitting of some of those successors.
686 *     - In the case that <tt>annotation_lists = NULL</tt>,
687 *       <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if the
688 *       isocore's lookahead sets were identical to those specified by
689 *       <tt>lookaheads \@pre</tt>.
690 *     - If no such merge was permitted, a new isocore of \c t containing
691 *       <tt>lookaheads \@pre</tt> was appended to the state list whose
692 *       previous tail was <tt>*last_statep \@pre</tt> and \c ::nstates was
693 *       incremented.  It was also appended to \c t's isocore list.
694 *   - <tt>*tp</tt> = the isocore of \c t into which
695 *     <tt>lookaheads \@pre</tt> was placed/merged.
696 *   - \c lookaheads, \c work1, and \c work2 may have been altered.
697 */
698static void
699ielr_compute_state (bitsetv follow_kernel_items, bitsetv always_follows,
700                    AnnotationList **annotation_lists, state *t,
701                    bitsetv lookaheads, state_list **last_statep,
702                    ContributionIndex work1[], bitsetv work2, state **tp)
703{
704  state_list *lr0_isocore = t->state_list->lr0Isocore;
705  state_list **this_isocorep;
706  bool has_lookaheads;
707
708  /* Determine whether there's an isocore of t with which these lookaheads can
709     be merged.  */
710  {
711    AnnotationIndex ai;
712    AnnotationList *a;
713    if (annotation_lists)
714      for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
715           a;
716           ++ai, a = a->next)
717        work1[ai] =
718          AnnotationList__computeDominantContribution (
719            a, lr0_isocore->state->nitems, lookaheads, false);
720    for (this_isocorep = &t->state_list;
721         this_isocorep == &t->state_list || *this_isocorep != t->state_list;
722         this_isocorep = &(*this_isocorep)->nextIsocore)
723      {
724        if (!(*this_isocorep)->recomputedAsSuccessor)
725          break;
726        if (annotation_lists)
727          {
728            for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
729                 a;
730                 ++ai, a = a->next)
731              {
732                if (work1[ai] != ContributionIndex__none)
733                  {
734                    /* This isocore compatibility test depends on the fact
735                       that, if the dominant contributions are the same for the
736                       two isocores, then merging their lookahead sets will not
737                       produce a state with a different dominant contribution.
738                       */
739                    ContributionIndex ci =
740                      AnnotationList__computeDominantContribution (
741                        a, lr0_isocore->state->nitems,
742                        (*this_isocorep)->lookaheads, false);
743                    if (ci != ContributionIndex__none && work1[ai] != ci)
744                      break;
745                  }
746              }
747            if (!a)
748              break;
749          }
750        else
751          {
752            size_t i;
753            for (i = 0; i < t->nitems; ++i)
754              {
755                if (!(*this_isocorep)->lookaheads
756                    || !(*this_isocorep)->lookaheads[i])
757                  {
758                    if (!bitset_empty_p (lookaheads[i]))
759                      break;
760                  }
761                /* bitset_equal_p uses the size of the first argument,
762                   so lookaheads[i] must be the second argument.  */
763                else if (!bitset_equal_p ((*this_isocorep)->lookaheads[i],
764                                          lookaheads[i]))
765                  break;
766              }
767            if (i == t->nitems)
768              break;
769          }
770      }
771  }
772
773  has_lookaheads = false;
774  {
775    size_t i;
776    for (i = 0; i < lr0_isocore->state->nitems; ++i)
777      if (!bitset_empty_p (lookaheads[i]))
778        {
779          has_lookaheads = true;
780          break;
781        }
782  }
783
784  /* Merge with an existing isocore.  */
785  if (this_isocorep == &t->state_list || *this_isocorep != t->state_list)
786    {
787      bool new_lookaheads = false;
788      *tp = (*this_isocorep)->state;
789
790      /* Merge lookaheads into the state and record whether any of them are
791         actually new.  */
792      if (has_lookaheads)
793        {
794          size_t i;
795          if (!(*this_isocorep)->lookaheads)
796            {
797              (*this_isocorep)->lookaheads =
798                xnmalloc (t->nitems, sizeof (*this_isocorep)->lookaheads);
799              for (i = 0; i < t->nitems; ++i)
800                (*this_isocorep)->lookaheads[i] = NULL;
801            }
802          for (i = 0; i < t->nitems; ++i)
803            if (!bitset_empty_p (lookaheads[i]))
804              {
805                if (!(*this_isocorep)->lookaheads[i])
806                  (*this_isocorep)->lookaheads[i] =
807                    bitset_create (ntokens, BITSET_FIXED);
808                bitset_andn (lookaheads[i],
809                             lookaheads[i], (*this_isocorep)->lookaheads[i]);
810                bitset_or ((*this_isocorep)->lookaheads[i],
811                           lookaheads[i], (*this_isocorep)->lookaheads[i]);
812                if (!bitset_empty_p (lookaheads[i]))
813                  new_lookaheads = true;
814              }
815        }
816
817      /* If new lookaheads were merged, propagate those lookaheads to the
818         successors, possibly splitting them.  If *tp is being recomputed for
819         the first time, this isn't necessary because the main
820         ielr_split_states loop will handle the successors later.  */
821      if (!(*this_isocorep)->recomputedAsSuccessor)
822        (*this_isocorep)->recomputedAsSuccessor = true;
823      else if (new_lookaheads)
824        {
825          int i;
826          /* When merging demands identical lookahead sets, it is impossible to
827             merge new lookaheads.  */
828          aver (annotation_lists);
829          for (i = 0; i < (*tp)->transitions->num; ++i)
830            {
831              state *t2 = (*tp)->transitions->states[i];
832              /* At any time, there's at most one state for which we have so
833                 far initially recomputed only some of its successors in the
834                 main ielr_split_states loop.  Because we recompute successors
835                 in order, we can just stop at the first such successor.  Of
836                 course, *tp might be a state some of whose successors have
837                 been recomputed as successors of other states rather than as
838                 successors of *tp.  It's fine if we go ahead and propagate to
839                 some of those.  We'll propagate to them again (but stop when
840                 we see nothing changes) and to the others when we reach *tp in
841                 the main ielr_split_states loop later.  */
842              if (!t2->state_list->recomputedAsSuccessor)
843                break;
844              AnnotationList__computeLookaheadFilter (
845                annotation_lists[t2->state_list->lr0Isocore->state->number],
846                t2->nitems, work2);
847              ielr_compute_lookaheads (follow_kernel_items, always_follows,
848                                       (*this_isocorep), t2, work2,
849                                       lookaheads);
850              /* FIXME: If splitting t2 here, it's possible that lookaheads
851                 that had already propagated from *tp to t2 will be left in t2
852                 after *tp has been removed as t2's predecessor:
853                 - We're going to recompute all lookaheads in phase 4, so these
854                   extra lookaheads won't appear in the final parser table.
855                 - If t2 has just one annotation, then these extra lookaheads
856                   cannot alter the dominating contribution to the associated
857                   inadequacy and thus cannot needlessly prevent a future merge
858                   of some new state with t2.  We can be sure of this because:
859                   - The fact that we're splitting t2 now means that some
860                     predecessors (at least one) other than *tp must be
861                     propagating contributions to t2.
862                   - The fact that t2 was merged in the first place means that,
863                     if *tp propagated any contributions, the dominating
864                     contribution must be the same as that from those other
865                     predecessors.
866                   - Thus, if some new state to be merged with t2 in the future
867                     proves to be compatible with the contributions propagated
868                     by the other predecessors, it will also be compatible with
869                     the contributions made by the extra lookaheads left behind
870                     by *tp.
871                 - However, if t2 has more than one annotation and these extra
872                   lookaheads contribute to one of their inadequacies, it's
873                   possible these extra lookaheads may needlessly prevent a
874                   future merge with t2.  For example:
875                   - Let's say there's an inadequacy A that makes the split
876                     necessary as follows:
877                     - There's currently just one other predecessor and it
878                       propagates to t2 some contributions to inadequacy A.
879                     - The new lookaheads that we were attempting to propagate
880                       from *tp to t2 made contributions to inadequacy A with a
881                       different dominating contribution than those from that
882                       other predecessor.
883                     - The extra lookaheads either make no contribution to
884                       inadequacy A or have the same dominating contribution as
885                       the contributions from the other predecessor.  Either
886                       way, as explained above, they can't prevent a future
887                       merge.
888                   - Let's say there's an inadequacy B that causes the trouble
889                     with future merges as follows:
890                     - The extra lookaheads make contributions to inadequacy B.
891                     - Those extra contributions did not prevent the original
892                       merge to create t2 because the other predecessor
893                       propagates to t2 no contributions to inadequacy B.
894                     - Thus, those extra contributions may prevent a future
895                       merge with t2 even though the merge would be fine if *tp
896                       had not left them behind.
897                 - Is the latter case common enough to worry about?
898                 - Perhaps we should track all predecessors and iterate them
899                   now to recreate t2 without those extra lookaheads.  */
900              ielr_compute_state (follow_kernel_items, always_follows,
901                                  annotation_lists, t2, lookaheads,
902                                  last_statep, work1, work2,
903                                  &(*tp)->transitions->states[i]);
904            }
905        }
906    }
907
908  /* Create a new isocore.  */
909  else
910    {
911      state_list *old_isocore = *this_isocorep;
912      (*last_statep)->next = *this_isocorep = xmalloc (sizeof **last_statep);
913      *last_statep = *this_isocorep;
914      (*last_statep)->state = *tp = state_new_isocore (t);
915      (*tp)->state_list = *last_statep;
916      (*last_statep)->recomputedAsSuccessor = true;
917      (*last_statep)->next = NULL;
918      (*last_statep)->lookaheads = NULL;
919      if (has_lookaheads)
920        {
921          size_t i;
922          (*last_statep)->lookaheads =
923            xnmalloc (t->nitems, sizeof (*last_statep)->lookaheads);
924          for (i = 0; i < t->nitems; ++i)
925            {
926              if (bitset_empty_p (lookaheads[i]))
927                (*last_statep)->lookaheads[i] = NULL;
928              else
929                {
930                  (*last_statep)->lookaheads[i] =
931                    bitset_create (ntokens, BITSET_FIXED);
932                  bitset_copy ((*last_statep)->lookaheads[i], lookaheads[i]);
933                }
934            }
935        }
936      (*last_statep)->lr0Isocore = lr0_isocore;
937      (*last_statep)->nextIsocore = old_isocore;
938    }
939}
940
941/**
942 * \pre
943 *   - \c follow_kernel_items and \c always_follows were computed by
944 *     \c ielr_compute_auxiliary_tables.
945 *   - Either:
946 *     - <tt>annotation_lists = NULL</tt> and <tt>max_annotations=0</tt>.
947 *     - \c annotation_lists and \c max_annotations were computed by
948 *       \c ielr_compute_annotation_lists.
949 * \post
950 *   - \c ::states is of size \c ::nstates (which might be greater than
951 *     <tt>::nstates \@pre</tt>) and no longer contains any LR(1)-relative
952 *     inadequacy.  \c annotation_lists was used to determine state
953 *     compatibility or, if <tt>annotation_lists = NULL</tt>, the canonical
954 *     LR(1) state compatibility test was used.
955 *   - If <tt>annotation_lists = NULL</tt>, reduction lookahead sets were
956 *     computed in all states.  TV_IELR_PHASE4 was pushed while they were
957 *     computed from item lookahead sets.
958 */
959static void
960ielr_split_states (bitsetv follow_kernel_items, bitsetv always_follows,
961                   AnnotationList **annotation_lists,
962                   AnnotationIndex max_annotations)
963{
964  state_list *first_state;
965  state_list *last_state;
966  bitsetv lookahead_filter = NULL;
967  bitsetv lookaheads;
968
969  /* Set up state list and some reusable bitsets.  */
970  {
971    size_t max_nitems = 0;
972    state_number i;
973    state_list **nodep = &first_state;
974    for (i = 0; i < nstates; ++i)
975      {
976        *nodep = states[i]->state_list = last_state = xmalloc (sizeof **nodep);
977        (*nodep)->state = states[i];
978        (*nodep)->recomputedAsSuccessor = false;
979        (*nodep)->lookaheads = NULL;
980        (*nodep)->lr0Isocore = *nodep;
981        (*nodep)->nextIsocore = *nodep;
982        nodep = &(*nodep)->next;
983        if (states[i]->nitems > max_nitems)
984          max_nitems = states[i]->nitems;
985      }
986    *nodep = NULL;
987    lookahead_filter = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
988    if (!annotation_lists)
989      bitsetv_ones (lookahead_filter);
990    lookaheads = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
991  }
992
993  /* Recompute states.  */
994  {
995    ContributionIndex *work = xnmalloc (max_annotations, sizeof *work);
996    state_list *this_state;
997    for (this_state = first_state; this_state; this_state = this_state->next)
998      {
999        state *s = this_state->state;
1000        int i;
1001        for (i = 0; i < s->transitions->num; ++i)
1002          {
1003            state *t = s->transitions->states[i];
1004            if (annotation_lists)
1005              AnnotationList__computeLookaheadFilter (
1006                annotation_lists[t->state_list->lr0Isocore->state->number],
1007                t->nitems, lookahead_filter);
1008            ielr_compute_lookaheads (follow_kernel_items, always_follows,
1009                                     this_state, t, lookahead_filter,
1010                                     lookaheads);
1011            ielr_compute_state (follow_kernel_items, always_follows,
1012                                annotation_lists, t, lookaheads, &last_state,
1013                                work, lookahead_filter,
1014                                &s->transitions->states[i]);
1015          }
1016      }
1017    free (work);
1018  }
1019
1020  bitsetv_free (lookahead_filter);
1021  bitsetv_free (lookaheads);
1022
1023  /* Store states back in the states array.  */
1024  states = xnrealloc (states, nstates, sizeof *states);
1025  {
1026    state_list *node;
1027    for (node = first_state; node; node = node->next)
1028      states[node->state->number] = node->state;
1029  }
1030
1031  /* In the case of canonical LR(1), copy item lookahead sets to reduction
1032     lookahead sets.  */
1033  if (!annotation_lists)
1034    {
1035      timevar_push (TV_IELR_PHASE4);
1036      initialize_LA ();
1037      state_list *node;
1038      for (node = first_state; node; node = node->next)
1039        if (!node->state->consistent)
1040          {
1041            size_t i = 0;
1042            item_number *itemset = node->state->items;
1043            size_t r;
1044            for (r = 0; r < node->state->reductions->num; ++r)
1045              {
1046                rule *this_rule = node->state->reductions->rules[r];
1047                bitset lookahead_set =
1048                  node->state->reductions->lookahead_tokens[r];
1049                if (item_number_is_rule_number (*this_rule->rhs))
1050                  ielr_compute_goto_follow_set (follow_kernel_items,
1051                                                always_follows, node,
1052                                                this_rule->lhs, lookahead_set);
1053                else if (node->lookaheads)
1054                  {
1055                    /* We don't need to start the kernel item search back at
1056                       i=0 because both items and reductions are sorted on rule
1057                       number.  */
1058                    while (!item_number_is_rule_number (ritem[itemset[i]])
1059                           || item_number_as_rule_number (ritem[itemset[i]])
1060                              != this_rule->number)
1061                      {
1062                        ++i;
1063                        aver (i < node->state->nitems);
1064                      }
1065                    if (node->lookaheads[i])
1066                      bitset_copy (lookahead_set, node->lookaheads[i]);
1067                  }
1068              }
1069          }
1070      timevar_pop (TV_IELR_PHASE4);
1071    }
1072
1073  /* Free state list.  */
1074  while (first_state)
1075    {
1076      state_list *node = first_state;
1077      if (node->lookaheads)
1078        {
1079          size_t i;
1080          for (i = 0; i < node->state->nitems; ++i)
1081            if (node->lookaheads[i])
1082              bitset_free (node->lookaheads[i]);
1083          free (node->lookaheads);
1084        }
1085      first_state = node->next;
1086      free (node);
1087    }
1088}
1089
1090void
1091ielr (void)
1092{
1093  LrType lr_type;
1094
1095  /* Examine user options.  */
1096  {
1097    char *type = muscle_percent_define_get ("lr.type");
1098    if (0 == strcmp (type, "lalr"))
1099      lr_type = LR_TYPE__LALR;
1100    else if (0 == strcmp (type, "ielr"))
1101      lr_type = LR_TYPE__IELR;
1102    else if (0 == strcmp (type, "canonical-lr"))
1103      lr_type = LR_TYPE__CANONICAL_LR;
1104    else
1105      aver (false);
1106    free (type);
1107  }
1108
1109  /* Phase 0: LALR(1).  */
1110  timevar_push (TV_LALR);
1111  if (lr_type == LR_TYPE__CANONICAL_LR)
1112    set_goto_map ();
1113  else
1114    lalr ();
1115  if (lr_type == LR_TYPE__LALR)
1116    {
1117      bitsetv_free (goto_follows);
1118      timevar_pop (TV_LALR);
1119      return;
1120    }
1121  timevar_pop (TV_LALR);
1122
1123  {
1124    bitsetv follow_kernel_items;
1125    bitsetv always_follows;
1126    InadequacyList **inadequacy_lists = NULL;
1127    AnnotationList **annotation_lists = NULL;
1128    struct obstack annotations_obstack;
1129    AnnotationIndex max_annotations = 0;
1130
1131    {
1132      /* Phase 1: Compute Auxiliary Tables.  */
1133      state ***predecessors;
1134      timevar_push (TV_IELR_PHASE1);
1135      ielr_compute_auxiliary_tables (
1136        &follow_kernel_items, &always_follows,
1137        lr_type == LR_TYPE__CANONICAL_LR ? NULL : &predecessors);
1138      timevar_pop (TV_IELR_PHASE1);
1139
1140      /* Phase 2: Compute Annotations.  */
1141      timevar_push (TV_IELR_PHASE2);
1142      if (lr_type != LR_TYPE__CANONICAL_LR)
1143        {
1144          obstack_init (&annotations_obstack);
1145          ielr_compute_annotation_lists (follow_kernel_items, always_follows,
1146                                         predecessors, &max_annotations,
1147                                         &inadequacy_lists, &annotation_lists,
1148                                         &annotations_obstack);
1149          {
1150            state_number i;
1151            for (i = 0; i < nstates; ++i)
1152              free (predecessors[i]);
1153          }
1154          free (predecessors);
1155          bitsetv_free (goto_follows);
1156          lalr_free ();
1157        }
1158      timevar_pop (TV_IELR_PHASE2);
1159    }
1160
1161    /* Phase 3: Split States.  */
1162    timevar_push (TV_IELR_PHASE3);
1163    {
1164      state_number nstates_lr0 = nstates;
1165      ielr_split_states (follow_kernel_items, always_follows,
1166                         annotation_lists, max_annotations);
1167      if (inadequacy_lists)
1168        {
1169          state_number i;
1170          for (i = 0; i < nstates_lr0; ++i)
1171            InadequacyList__delete (inadequacy_lists[i]);
1172        }
1173    }
1174    free (inadequacy_lists);
1175    if (annotation_lists)
1176      obstack_free (&annotations_obstack, NULL);
1177    free (annotation_lists);
1178    bitsetv_free (follow_kernel_items);
1179    bitsetv_free (always_follows);
1180    timevar_pop (TV_IELR_PHASE3);
1181  }
1182
1183  /* Phase 4: Compute Reduction Lookaheads.  */
1184  timevar_push (TV_IELR_PHASE4);
1185  free (goto_map);
1186  free (from_state);
1187  free (to_state);
1188  if (lr_type == LR_TYPE__CANONICAL_LR)
1189    {
1190      /* Reduction lookaheads are computed in ielr_split_states above
1191         but are timed as part of phase 4. */
1192      set_goto_map ();
1193    }
1194  else
1195    {
1196      lalr ();
1197      bitsetv_free (goto_follows);
1198    }
1199  timevar_pop (TV_IELR_PHASE4);
1200}
1201