1%{/* Bison Grammar Parser                             -*- C -*-
2
3   Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5   This file is part of Bison, the GNU Compiler Compiler.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20   02110-1301  USA
21*/
22
23#include <config.h>
24#include "system.h"
25
26#include "complain.h"
27#include "conflicts.h"
28#include "files.h"
29#include "getargs.h"
30#include "gram.h"
31#include "muscle_tab.h"
32#include "quotearg.h"
33#include "reader.h"
34#include "symlist.h"
35#include "strverscmp.h"
36
37#define YYLLOC_DEFAULT(Current, Rhs, N)  (Current) = lloc_default (Rhs, N)
38static YYLTYPE lloc_default (YYLTYPE const *, int);
39
40#define YY_LOCATION_PRINT(File, Loc) \
41	  location_print (File, Loc)
42
43static void version_check (location const *loc, char const *version);
44
45/* Request detailed syntax error messages, and pass them to GRAM_ERROR.
46   FIXME: depends on the undocumented availability of YYLLOC.  */
47#undef  yyerror
48#define yyerror(Msg) \
49	gram_error (&yylloc, Msg)
50static void gram_error (location const *, char const *);
51
52static void add_param (char const *, char *, location);
53
54static symbol_class current_class = unknown_sym;
55static uniqstr current_type = 0;
56static symbol *current_lhs;
57static location current_lhs_location;
58static int current_prec = 0;
59
60#ifdef UINT_FAST8_MAX
61# define YYTYPE_UINT8 uint_fast8_t
62#endif
63#ifdef INT_FAST8_MAX
64# define YYTYPE_INT8 int_fast8_t
65#endif
66#ifdef UINT_FAST16_MAX
67# define YYTYPE_UINT16 uint_fast16_t
68#endif
69#ifdef INT_FAST16_MAX
70# define YYTYPE_INT16 int_fast16_t
71#endif
72%}
73
74%debug
75%verbose
76%defines
77%locations
78%pure-parser
79%error-verbose
80%defines
81%name-prefix="gram_"
82
83%initial-action
84{
85  /* Bison's grammar can initial empty locations, hence a default
86     location is needed. */
87  @$.start.file   = @$.end.file   = current_file;
88  @$.start.line   = @$.end.line   = 1;
89  @$.start.column = @$.end.column = 0;
90}
91
92/* Only NUMBERS have a value.  */
93%union
94{
95  symbol *symbol;
96  symbol_list *list;
97  int integer;
98  char *chars;
99  assoc assoc;
100  uniqstr uniqstr;
101};
102
103/* Define the tokens together with their human representation.  */
104%token GRAM_EOF 0 "end of file"
105%token STRING     "string"
106%token INT        "integer"
107
108%token PERCENT_TOKEN       "%token"
109%token PERCENT_NTERM       "%nterm"
110
111%token PERCENT_TYPE        "%type"
112%token PERCENT_DESTRUCTOR  "%destructor {...}"
113%token PERCENT_PRINTER     "%printer {...}"
114
115%token PERCENT_UNION       "%union {...}"
116
117%token PERCENT_LEFT        "%left"
118%token PERCENT_RIGHT       "%right"
119%token PERCENT_NONASSOC    "%nonassoc"
120
121%token PERCENT_PREC          "%prec"
122%token PERCENT_DPREC         "%dprec"
123%token PERCENT_MERGE         "%merge"
124
125
126/*----------------------.
127| Global Declarations.  |
128`----------------------*/
129
130%token
131  PERCENT_DEBUG           "%debug"
132  PERCENT_DEFAULT_PREC    "%default-prec"
133  PERCENT_DEFINE          "%define"
134  PERCENT_DEFINES         "%defines"
135  PERCENT_ERROR_VERBOSE   "%error-verbose"
136  PERCENT_EXPECT          "%expect"
137  PERCENT_EXPECT_RR	  "%expect-rr"
138  PERCENT_FILE_PREFIX     "%file-prefix"
139  PERCENT_GLR_PARSER      "%glr-parser"
140  PERCENT_INITIAL_ACTION  "%initial-action {...}"
141  PERCENT_LEX_PARAM       "%lex-param {...}"
142  PERCENT_LOCATIONS       "%locations"
143  PERCENT_NAME_PREFIX     "%name-prefix"
144  PERCENT_NO_DEFAULT_PREC "%no-default-prec"
145  PERCENT_NO_LINES        "%no-lines"
146  PERCENT_NONDETERMINISTIC_PARSER
147			  "%nondeterministic-parser"
148  PERCENT_OUTPUT          "%output"
149  PERCENT_PARSE_PARAM     "%parse-param {...}"
150  PERCENT_PURE_PARSER     "%pure-parser"
151  PERCENT_REQUIRE	  "%require"
152  PERCENT_SKELETON        "%skeleton"
153  PERCENT_START           "%start"
154  PERCENT_TOKEN_TABLE     "%token-table"
155  PERCENT_VERBOSE         "%verbose"
156  PERCENT_YACC            "%yacc"
157;
158
159%token TYPE            "type"
160%token EQUAL           "="
161%token SEMICOLON       ";"
162%token PIPE            "|"
163%token ID              "identifier"
164%token ID_COLON        "identifier:"
165%token PERCENT_PERCENT "%%"
166%token PROLOGUE        "%{...%}"
167%token EPILOGUE        "epilogue"
168%token BRACED_CODE     "{...}"
169
170
171%type <chars> STRING string_content
172	      "%destructor {...}"
173	      "%initial-action {...}"
174	      "%lex-param {...}"
175	      "%parse-param {...}"
176	      "%printer {...}"
177	      "%union {...}"
178	      PROLOGUE EPILOGUE
179%printer { fprintf (stderr, "\"%s\"", $$); }
180	      STRING string_content
181%printer { fprintf (stderr, "{\n%s\n}", $$); }
182	      "%destructor {...}"
183	      "%initial-action {...}"
184	      "%lex-param {...}"
185	      "%parse-param {...}"
186	      "%printer {...}"
187	      "%union {...}"
188	      PROLOGUE EPILOGUE
189%type <uniqstr> TYPE
190%printer { fprintf (stderr, "<%s>", $$); } TYPE
191%type <integer> INT
192%printer { fprintf (stderr, "%d", $$); } INT
193%type <symbol> ID symbol string_as_id
194%printer { fprintf (stderr, "%s", $$->tag); } ID symbol string_as_id
195%type <symbol> ID_COLON
196%printer { fprintf (stderr, "%s:", $$->tag); } ID_COLON
197%type <assoc> precedence_declarator
198%type <list>  symbols.1
199%%
200
201input:
202  declarations "%%" grammar epilogue.opt
203;
204
205
206	/*------------------------------------.
207	| Declarations: before the first %%.  |
208	`------------------------------------*/
209
210declarations:
211  /* Nothing */
212| declarations declaration
213;
214
215declaration:
216  grammar_declaration
217| PROLOGUE                                 { prologue_augment ($1, @1); }
218| "%debug"                                 { debug_flag = true; }
219| "%define" string_content
220    {
221      static char one[] = "1";
222      muscle_insert ($2, one);
223    }
224| "%define" string_content string_content  { muscle_insert ($2, $3); }
225| "%defines"                               { defines_flag = true; }
226| "%error-verbose"                         { error_verbose = true; }
227| "%expect" INT                            { expected_sr_conflicts = $2; }
228| "%expect-rr" INT			   { expected_rr_conflicts = $2; }
229| "%file-prefix" "=" string_content        { spec_file_prefix = $3; }
230| "%glr-parser"
231    {
232      nondeterministic_parser = true;
233      glr_parser = true;
234    }
235| "%initial-action {...}"
236    {
237      muscle_code_grow ("initial_action", $1, @1);
238    }
239| "%lex-param {...}"			   { add_param ("lex_param", $1, @1); }
240| "%locations"                             { locations_flag = true; }
241| "%name-prefix" "=" string_content        { spec_name_prefix = $3; }
242| "%no-lines"                              { no_lines_flag = true; }
243| "%nondeterministic-parser"		   { nondeterministic_parser = true; }
244| "%output" "=" string_content             { spec_outfile = $3; }
245| "%parse-param {...}"			   { add_param ("parse_param", $1, @1); }
246| "%pure-parser"                           { pure_parser = true; }
247| "%require" string_content                { version_check (&@2, $2); }
248| "%skeleton" string_content               { skeleton = $2; }
249| "%token-table"                           { token_table_flag = true; }
250| "%verbose"                               { report_flag = report_states; }
251| "%yacc"                                  { yacc_flag = true; }
252| /*FIXME: Err?  What is this horror doing here? */ ";"
253;
254
255grammar_declaration:
256  precedence_declaration
257| symbol_declaration
258| "%start" symbol
259    {
260      grammar_start_symbol_set ($2, @2);
261    }
262| "%union {...}"
263    {
264      char const *body = $1;
265
266      if (typed)
267	{
268	  /* Concatenate the union bodies, turning the first one's
269	     trailing '}' into '\n', and omitting the second one's '{'.  */
270	  char *code = muscle_find ("stype");
271	  code[strlen (code) - 1] = '\n';
272	  body++;
273	}
274
275      typed = true;
276      muscle_code_grow ("stype", body, @1);
277    }
278| "%destructor {...}" symbols.1
279    {
280      symbol_list *list;
281      for (list = $2; list; list = list->next)
282	symbol_destructor_set (list->sym, $1, @1);
283      symbol_list_free ($2);
284    }
285| "%printer {...}" symbols.1
286    {
287      symbol_list *list;
288      for (list = $2; list; list = list->next)
289	symbol_printer_set (list->sym, $1, @1);
290      symbol_list_free ($2);
291    }
292| "%default-prec"
293    {
294      default_prec = true;
295    }
296| "%no-default-prec"
297    {
298      default_prec = false;
299    }
300;
301
302symbol_declaration:
303  "%nterm" { current_class = nterm_sym; } symbol_defs.1
304    {
305      current_class = unknown_sym;
306      current_type = NULL;
307    }
308| "%token" { current_class = token_sym; } symbol_defs.1
309    {
310      current_class = unknown_sym;
311      current_type = NULL;
312    }
313| "%type" TYPE symbols.1
314    {
315      symbol_list *list;
316      for (list = $3; list; list = list->next)
317	symbol_type_set (list->sym, $2, @2);
318      symbol_list_free ($3);
319    }
320;
321
322precedence_declaration:
323  precedence_declarator type.opt symbols.1
324    {
325      symbol_list *list;
326      ++current_prec;
327      for (list = $3; list; list = list->next)
328	{
329	  symbol_type_set (list->sym, current_type, @2);
330	  symbol_precedence_set (list->sym, current_prec, $1, @1);
331	}
332      symbol_list_free ($3);
333      current_type = NULL;
334    }
335;
336
337precedence_declarator:
338  "%left"     { $$ = left_assoc; }
339| "%right"    { $$ = right_assoc; }
340| "%nonassoc" { $$ = non_assoc; }
341;
342
343type.opt:
344  /* Nothing. */ { current_type = NULL; }
345| TYPE           { current_type = $1; }
346;
347
348/* One or more nonterminals to be %typed. */
349
350symbols.1:
351  symbol            { $$ = symbol_list_new ($1, @1); }
352| symbols.1 symbol  { $$ = symbol_list_prepend ($1, $2, @2); }
353;
354
355/* One token definition.  */
356symbol_def:
357  TYPE
358     {
359       current_type = $1;
360     }
361| ID
362     {
363       symbol_class_set ($1, current_class, @1, true);
364       symbol_type_set ($1, current_type, @1);
365     }
366| ID INT
367    {
368      symbol_class_set ($1, current_class, @1, true);
369      symbol_type_set ($1, current_type, @1);
370      symbol_user_token_number_set ($1, $2, @2);
371    }
372| ID string_as_id
373    {
374      symbol_class_set ($1, current_class, @1, true);
375      symbol_type_set ($1, current_type, @1);
376      symbol_make_alias ($1, $2, @$);
377    }
378| ID INT string_as_id
379    {
380      symbol_class_set ($1, current_class, @1, true);
381      symbol_type_set ($1, current_type, @1);
382      symbol_user_token_number_set ($1, $2, @2);
383      symbol_make_alias ($1, $3, @$);
384    }
385;
386
387/* One or more symbol definitions. */
388symbol_defs.1:
389  symbol_def
390| symbol_defs.1 symbol_def
391;
392
393
394	/*------------------------------------------.
395	| The grammar section: between the two %%.  |
396	`------------------------------------------*/
397
398grammar:
399  rules_or_grammar_declaration
400| grammar rules_or_grammar_declaration
401;
402
403/* As a Bison extension, one can use the grammar declarations in the
404   body of the grammar.  */
405rules_or_grammar_declaration:
406  rules
407| grammar_declaration ";"
408| error ";"
409    {
410      yyerrok;
411    }
412;
413
414rules:
415  ID_COLON { current_lhs = $1; current_lhs_location = @1; } rhses.1
416;
417
418rhses.1:
419  rhs                { grammar_current_rule_end (@1); }
420| rhses.1 "|" rhs    { grammar_current_rule_end (@3); }
421| rhses.1 ";"
422;
423
424rhs:
425  /* Nothing.  */
426    { grammar_current_rule_begin (current_lhs, current_lhs_location); }
427| rhs symbol
428    { grammar_current_rule_symbol_append ($2, @2); }
429| rhs action
430| rhs "%prec" symbol
431    { grammar_current_rule_prec_set ($3, @3); }
432| rhs "%dprec" INT
433    { grammar_current_rule_dprec_set ($3, @3); }
434| rhs "%merge" TYPE
435    { grammar_current_rule_merge_set ($3, @3); }
436;
437
438symbol:
439  ID              { $$ = $1; }
440| string_as_id    { $$ = $1; }
441;
442
443/* Handle the semantics of an action specially, with a mid-rule
444   action, so that grammar_current_rule_action_append is invoked
445   immediately after the braced code is read by the scanner.
446
447   This implementation relies on the LALR(1) parsing algorithm.
448   If grammar_current_rule_action_append were executed in a normal
449   action for this rule, then when the input grammar contains two
450   successive actions, the scanner would have to read both actions
451   before reducing this rule.  That wouldn't work, since the scanner
452   relies on all preceding input actions being processed by
453   grammar_current_rule_action_append before it scans the next
454   action.  */
455action:
456    { grammar_current_rule_action_append (last_string, last_braced_code_loc); }
457  BRACED_CODE
458;
459
460/* A string used as an ID: quote it.  */
461string_as_id:
462  STRING
463    {
464      $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
465      symbol_class_set ($$, token_sym, @1, false);
466    }
467;
468
469/* A string used for its contents.  Don't quote it.  */
470string_content:
471  STRING
472    { $$ = $1; }
473;
474
475
476epilogue.opt:
477  /* Nothing.  */
478| "%%" EPILOGUE
479    {
480      muscle_code_grow ("epilogue", $2, @2);
481      scanner_last_string_free ();
482    }
483;
484
485%%
486
487
488/* Return the location of the left-hand side of a rule whose
489   right-hand side is RHS[1] ... RHS[N].  Ignore empty nonterminals in
490   the right-hand side, and return an empty location equal to the end
491   boundary of RHS[0] if the right-hand side is empty.  */
492
493static YYLTYPE
494lloc_default (YYLTYPE const *rhs, int n)
495{
496  int i;
497  YYLTYPE loc;
498
499  /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
500     The bug is fixed in 7.4.2m, but play it safe for now.  */
501  loc.start = rhs[n].end;
502  loc.end = rhs[n].end;
503
504  /* Ignore empty nonterminals the start of the the right-hand side.
505     Do not bother to ignore them at the end of the right-hand side,
506     since empty nonterminals have the same end as their predecessors.  */
507  for (i = 1; i <= n; i++)
508    if (! equal_boundaries (rhs[i].start, rhs[i].end))
509      {
510	loc.start = rhs[i].start;
511	break;
512      }
513
514  return loc;
515}
516
517
518/* Add a lex-param or a parse-param (depending on TYPE) with
519   declaration DECL and location LOC.  */
520
521static void
522add_param (char const *type, char *decl, location loc)
523{
524  static char const alphanum[26 + 26 + 1 + 10] =
525    "abcdefghijklmnopqrstuvwxyz"
526    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
527    "_"
528    "0123456789";
529  char const *name_start = NULL;
530  char *p;
531
532  /* Stop on last actual character.  */
533  for (p = decl; p[1]; p++)
534    if ((p == decl
535	 || ! memchr (alphanum, p[-1], sizeof alphanum))
536	&& memchr (alphanum, p[0], sizeof alphanum - 10))
537      name_start = p;
538
539  /* Strip the surrounding '{' and '}', and any blanks just inside
540     the braces.  */
541  while (*--p == ' ' || *p == '\t')
542    continue;
543  p[1] = '\0';
544  while (*++decl == ' ' || *decl == '\t')
545    continue;
546
547  if (! name_start)
548    complain_at (loc, _("missing identifier in parameter declaration"));
549  else
550    {
551      char *name;
552      size_t name_len;
553
554      for (name_len = 1;
555	   memchr (alphanum, name_start[name_len], sizeof alphanum);
556	   name_len++)
557	continue;
558
559      name = xmalloc (name_len + 1);
560      memcpy (name, name_start, name_len);
561      name[name_len] = '\0';
562      muscle_pair_list_grow (type, decl, name);
563      free (name);
564    }
565
566  scanner_last_string_free ();
567}
568
569static void
570version_check (location const *loc, char const *version)
571{
572  if (strverscmp (version, PACKAGE_VERSION) > 0)
573    {
574      complain_at (*loc, "require bison %s, but have %s",
575		   version, PACKAGE_VERSION);
576      exit (63);
577    }
578}
579
580static void
581gram_error (location const *loc, char const *msg)
582{
583  complain_at (*loc, "%s", msg);
584}
585
586char const *
587token_name (int type)
588{
589  return yytname[YYTRANSLATE (type)];
590}
591