1%{/* Bison Grammar Parser -*- C -*- 2 3 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. 4 5 This file is part of Bison, the GNU Compiler Compiler. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 20 02110-1301 USA 21*/ 22 23#include <config.h> 24#include "system.h" 25 26#include "complain.h" 27#include "conflicts.h" 28#include "files.h" 29#include "getargs.h" 30#include "gram.h" 31#include "muscle_tab.h" 32#include "quotearg.h" 33#include "reader.h" 34#include "symlist.h" 35#include "strverscmp.h" 36 37#define YYLLOC_DEFAULT(Current, Rhs, N) (Current) = lloc_default (Rhs, N) 38static YYLTYPE lloc_default (YYLTYPE const *, int); 39 40#define YY_LOCATION_PRINT(File, Loc) \ 41 location_print (File, Loc) 42 43static void version_check (location const *loc, char const *version); 44 45/* Request detailed syntax error messages, and pass them to GRAM_ERROR. 46 FIXME: depends on the undocumented availability of YYLLOC. */ 47#undef yyerror 48#define yyerror(Msg) \ 49 gram_error (&yylloc, Msg) 50static void gram_error (location const *, char const *); 51 52static void add_param (char const *, char *, location); 53 54static symbol_class current_class = unknown_sym; 55static uniqstr current_type = 0; 56static symbol *current_lhs; 57static location current_lhs_location; 58static int current_prec = 0; 59 60#ifdef UINT_FAST8_MAX 61# define YYTYPE_UINT8 uint_fast8_t 62#endif 63#ifdef INT_FAST8_MAX 64# define YYTYPE_INT8 int_fast8_t 65#endif 66#ifdef UINT_FAST16_MAX 67# define YYTYPE_UINT16 uint_fast16_t 68#endif 69#ifdef INT_FAST16_MAX 70# define YYTYPE_INT16 int_fast16_t 71#endif 72%} 73 74%debug 75%verbose 76%defines 77%locations 78%pure-parser 79%error-verbose 80%defines 81%name-prefix="gram_" 82 83%initial-action 84{ 85 /* Bison's grammar can initial empty locations, hence a default 86 location is needed. */ 87 @$.start.file = @$.end.file = current_file; 88 @$.start.line = @$.end.line = 1; 89 @$.start.column = @$.end.column = 0; 90} 91 92/* Only NUMBERS have a value. */ 93%union 94{ 95 symbol *symbol; 96 symbol_list *list; 97 int integer; 98 char *chars; 99 assoc assoc; 100 uniqstr uniqstr; 101}; 102 103/* Define the tokens together with their human representation. */ 104%token GRAM_EOF 0 "end of file" 105%token STRING "string" 106%token INT "integer" 107 108%token PERCENT_TOKEN "%token" 109%token PERCENT_NTERM "%nterm" 110 111%token PERCENT_TYPE "%type" 112%token PERCENT_DESTRUCTOR "%destructor {...}" 113%token PERCENT_PRINTER "%printer {...}" 114 115%token PERCENT_UNION "%union {...}" 116 117%token PERCENT_LEFT "%left" 118%token PERCENT_RIGHT "%right" 119%token PERCENT_NONASSOC "%nonassoc" 120 121%token PERCENT_PREC "%prec" 122%token PERCENT_DPREC "%dprec" 123%token PERCENT_MERGE "%merge" 124 125 126/*----------------------. 127| Global Declarations. | 128`----------------------*/ 129 130%token 131 PERCENT_DEBUG "%debug" 132 PERCENT_DEFAULT_PREC "%default-prec" 133 PERCENT_DEFINE "%define" 134 PERCENT_DEFINES "%defines" 135 PERCENT_ERROR_VERBOSE "%error-verbose" 136 PERCENT_EXPECT "%expect" 137 PERCENT_EXPECT_RR "%expect-rr" 138 PERCENT_FILE_PREFIX "%file-prefix" 139 PERCENT_GLR_PARSER "%glr-parser" 140 PERCENT_INITIAL_ACTION "%initial-action {...}" 141 PERCENT_LEX_PARAM "%lex-param {...}" 142 PERCENT_LOCATIONS "%locations" 143 PERCENT_NAME_PREFIX "%name-prefix" 144 PERCENT_NO_DEFAULT_PREC "%no-default-prec" 145 PERCENT_NO_LINES "%no-lines" 146 PERCENT_NONDETERMINISTIC_PARSER 147 "%nondeterministic-parser" 148 PERCENT_OUTPUT "%output" 149 PERCENT_PARSE_PARAM "%parse-param {...}" 150 PERCENT_PURE_PARSER "%pure-parser" 151 PERCENT_REQUIRE "%require" 152 PERCENT_SKELETON "%skeleton" 153 PERCENT_START "%start" 154 PERCENT_TOKEN_TABLE "%token-table" 155 PERCENT_VERBOSE "%verbose" 156 PERCENT_YACC "%yacc" 157; 158 159%token TYPE "type" 160%token EQUAL "=" 161%token SEMICOLON ";" 162%token PIPE "|" 163%token ID "identifier" 164%token ID_COLON "identifier:" 165%token PERCENT_PERCENT "%%" 166%token PROLOGUE "%{...%}" 167%token EPILOGUE "epilogue" 168%token BRACED_CODE "{...}" 169 170 171%type <chars> STRING string_content 172 "%destructor {...}" 173 "%initial-action {...}" 174 "%lex-param {...}" 175 "%parse-param {...}" 176 "%printer {...}" 177 "%union {...}" 178 PROLOGUE EPILOGUE 179%printer { fprintf (stderr, "\"%s\"", $$); } 180 STRING string_content 181%printer { fprintf (stderr, "{\n%s\n}", $$); } 182 "%destructor {...}" 183 "%initial-action {...}" 184 "%lex-param {...}" 185 "%parse-param {...}" 186 "%printer {...}" 187 "%union {...}" 188 PROLOGUE EPILOGUE 189%type <uniqstr> TYPE 190%printer { fprintf (stderr, "<%s>", $$); } TYPE 191%type <integer> INT 192%printer { fprintf (stderr, "%d", $$); } INT 193%type <symbol> ID symbol string_as_id 194%printer { fprintf (stderr, "%s", $$->tag); } ID symbol string_as_id 195%type <symbol> ID_COLON 196%printer { fprintf (stderr, "%s:", $$->tag); } ID_COLON 197%type <assoc> precedence_declarator 198%type <list> symbols.1 199%% 200 201input: 202 declarations "%%" grammar epilogue.opt 203; 204 205 206 /*------------------------------------. 207 | Declarations: before the first %%. | 208 `------------------------------------*/ 209 210declarations: 211 /* Nothing */ 212| declarations declaration 213; 214 215declaration: 216 grammar_declaration 217| PROLOGUE { prologue_augment ($1, @1); } 218| "%debug" { debug_flag = true; } 219| "%define" string_content 220 { 221 static char one[] = "1"; 222 muscle_insert ($2, one); 223 } 224| "%define" string_content string_content { muscle_insert ($2, $3); } 225| "%defines" { defines_flag = true; } 226| "%error-verbose" { error_verbose = true; } 227| "%expect" INT { expected_sr_conflicts = $2; } 228| "%expect-rr" INT { expected_rr_conflicts = $2; } 229| "%file-prefix" "=" string_content { spec_file_prefix = $3; } 230| "%glr-parser" 231 { 232 nondeterministic_parser = true; 233 glr_parser = true; 234 } 235| "%initial-action {...}" 236 { 237 muscle_code_grow ("initial_action", $1, @1); 238 } 239| "%lex-param {...}" { add_param ("lex_param", $1, @1); } 240| "%locations" { locations_flag = true; } 241| "%name-prefix" "=" string_content { spec_name_prefix = $3; } 242| "%no-lines" { no_lines_flag = true; } 243| "%nondeterministic-parser" { nondeterministic_parser = true; } 244| "%output" "=" string_content { spec_outfile = $3; } 245| "%parse-param {...}" { add_param ("parse_param", $1, @1); } 246| "%pure-parser" { pure_parser = true; } 247| "%require" string_content { version_check (&@2, $2); } 248| "%skeleton" string_content { skeleton = $2; } 249| "%token-table" { token_table_flag = true; } 250| "%verbose" { report_flag = report_states; } 251| "%yacc" { yacc_flag = true; } 252| /*FIXME: Err? What is this horror doing here? */ ";" 253; 254 255grammar_declaration: 256 precedence_declaration 257| symbol_declaration 258| "%start" symbol 259 { 260 grammar_start_symbol_set ($2, @2); 261 } 262| "%union {...}" 263 { 264 char const *body = $1; 265 266 if (typed) 267 { 268 /* Concatenate the union bodies, turning the first one's 269 trailing '}' into '\n', and omitting the second one's '{'. */ 270 char *code = muscle_find ("stype"); 271 code[strlen (code) - 1] = '\n'; 272 body++; 273 } 274 275 typed = true; 276 muscle_code_grow ("stype", body, @1); 277 } 278| "%destructor {...}" symbols.1 279 { 280 symbol_list *list; 281 for (list = $2; list; list = list->next) 282 symbol_destructor_set (list->sym, $1, @1); 283 symbol_list_free ($2); 284 } 285| "%printer {...}" symbols.1 286 { 287 symbol_list *list; 288 for (list = $2; list; list = list->next) 289 symbol_printer_set (list->sym, $1, @1); 290 symbol_list_free ($2); 291 } 292| "%default-prec" 293 { 294 default_prec = true; 295 } 296| "%no-default-prec" 297 { 298 default_prec = false; 299 } 300; 301 302symbol_declaration: 303 "%nterm" { current_class = nterm_sym; } symbol_defs.1 304 { 305 current_class = unknown_sym; 306 current_type = NULL; 307 } 308| "%token" { current_class = token_sym; } symbol_defs.1 309 { 310 current_class = unknown_sym; 311 current_type = NULL; 312 } 313| "%type" TYPE symbols.1 314 { 315 symbol_list *list; 316 for (list = $3; list; list = list->next) 317 symbol_type_set (list->sym, $2, @2); 318 symbol_list_free ($3); 319 } 320; 321 322precedence_declaration: 323 precedence_declarator type.opt symbols.1 324 { 325 symbol_list *list; 326 ++current_prec; 327 for (list = $3; list; list = list->next) 328 { 329 symbol_type_set (list->sym, current_type, @2); 330 symbol_precedence_set (list->sym, current_prec, $1, @1); 331 } 332 symbol_list_free ($3); 333 current_type = NULL; 334 } 335; 336 337precedence_declarator: 338 "%left" { $$ = left_assoc; } 339| "%right" { $$ = right_assoc; } 340| "%nonassoc" { $$ = non_assoc; } 341; 342 343type.opt: 344 /* Nothing. */ { current_type = NULL; } 345| TYPE { current_type = $1; } 346; 347 348/* One or more nonterminals to be %typed. */ 349 350symbols.1: 351 symbol { $$ = symbol_list_new ($1, @1); } 352| symbols.1 symbol { $$ = symbol_list_prepend ($1, $2, @2); } 353; 354 355/* One token definition. */ 356symbol_def: 357 TYPE 358 { 359 current_type = $1; 360 } 361| ID 362 { 363 symbol_class_set ($1, current_class, @1, true); 364 symbol_type_set ($1, current_type, @1); 365 } 366| ID INT 367 { 368 symbol_class_set ($1, current_class, @1, true); 369 symbol_type_set ($1, current_type, @1); 370 symbol_user_token_number_set ($1, $2, @2); 371 } 372| ID string_as_id 373 { 374 symbol_class_set ($1, current_class, @1, true); 375 symbol_type_set ($1, current_type, @1); 376 symbol_make_alias ($1, $2, @$); 377 } 378| ID INT string_as_id 379 { 380 symbol_class_set ($1, current_class, @1, true); 381 symbol_type_set ($1, current_type, @1); 382 symbol_user_token_number_set ($1, $2, @2); 383 symbol_make_alias ($1, $3, @$); 384 } 385; 386 387/* One or more symbol definitions. */ 388symbol_defs.1: 389 symbol_def 390| symbol_defs.1 symbol_def 391; 392 393 394 /*------------------------------------------. 395 | The grammar section: between the two %%. | 396 `------------------------------------------*/ 397 398grammar: 399 rules_or_grammar_declaration 400| grammar rules_or_grammar_declaration 401; 402 403/* As a Bison extension, one can use the grammar declarations in the 404 body of the grammar. */ 405rules_or_grammar_declaration: 406 rules 407| grammar_declaration ";" 408| error ";" 409 { 410 yyerrok; 411 } 412; 413 414rules: 415 ID_COLON { current_lhs = $1; current_lhs_location = @1; } rhses.1 416; 417 418rhses.1: 419 rhs { grammar_current_rule_end (@1); } 420| rhses.1 "|" rhs { grammar_current_rule_end (@3); } 421| rhses.1 ";" 422; 423 424rhs: 425 /* Nothing. */ 426 { grammar_current_rule_begin (current_lhs, current_lhs_location); } 427| rhs symbol 428 { grammar_current_rule_symbol_append ($2, @2); } 429| rhs action 430| rhs "%prec" symbol 431 { grammar_current_rule_prec_set ($3, @3); } 432| rhs "%dprec" INT 433 { grammar_current_rule_dprec_set ($3, @3); } 434| rhs "%merge" TYPE 435 { grammar_current_rule_merge_set ($3, @3); } 436; 437 438symbol: 439 ID { $$ = $1; } 440| string_as_id { $$ = $1; } 441; 442 443/* Handle the semantics of an action specially, with a mid-rule 444 action, so that grammar_current_rule_action_append is invoked 445 immediately after the braced code is read by the scanner. 446 447 This implementation relies on the LALR(1) parsing algorithm. 448 If grammar_current_rule_action_append were executed in a normal 449 action for this rule, then when the input grammar contains two 450 successive actions, the scanner would have to read both actions 451 before reducing this rule. That wouldn't work, since the scanner 452 relies on all preceding input actions being processed by 453 grammar_current_rule_action_append before it scans the next 454 action. */ 455action: 456 { grammar_current_rule_action_append (last_string, last_braced_code_loc); } 457 BRACED_CODE 458; 459 460/* A string used as an ID: quote it. */ 461string_as_id: 462 STRING 463 { 464 $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1); 465 symbol_class_set ($$, token_sym, @1, false); 466 } 467; 468 469/* A string used for its contents. Don't quote it. */ 470string_content: 471 STRING 472 { $$ = $1; } 473; 474 475 476epilogue.opt: 477 /* Nothing. */ 478| "%%" EPILOGUE 479 { 480 muscle_code_grow ("epilogue", $2, @2); 481 scanner_last_string_free (); 482 } 483; 484 485%% 486 487 488/* Return the location of the left-hand side of a rule whose 489 right-hand side is RHS[1] ... RHS[N]. Ignore empty nonterminals in 490 the right-hand side, and return an empty location equal to the end 491 boundary of RHS[0] if the right-hand side is empty. */ 492 493static YYLTYPE 494lloc_default (YYLTYPE const *rhs, int n) 495{ 496 int i; 497 YYLTYPE loc; 498 499 /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;". 500 The bug is fixed in 7.4.2m, but play it safe for now. */ 501 loc.start = rhs[n].end; 502 loc.end = rhs[n].end; 503 504 /* Ignore empty nonterminals the start of the the right-hand side. 505 Do not bother to ignore them at the end of the right-hand side, 506 since empty nonterminals have the same end as their predecessors. */ 507 for (i = 1; i <= n; i++) 508 if (! equal_boundaries (rhs[i].start, rhs[i].end)) 509 { 510 loc.start = rhs[i].start; 511 break; 512 } 513 514 return loc; 515} 516 517 518/* Add a lex-param or a parse-param (depending on TYPE) with 519 declaration DECL and location LOC. */ 520 521static void 522add_param (char const *type, char *decl, location loc) 523{ 524 static char const alphanum[26 + 26 + 1 + 10] = 525 "abcdefghijklmnopqrstuvwxyz" 526 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 527 "_" 528 "0123456789"; 529 char const *name_start = NULL; 530 char *p; 531 532 /* Stop on last actual character. */ 533 for (p = decl; p[1]; p++) 534 if ((p == decl 535 || ! memchr (alphanum, p[-1], sizeof alphanum)) 536 && memchr (alphanum, p[0], sizeof alphanum - 10)) 537 name_start = p; 538 539 /* Strip the surrounding '{' and '}', and any blanks just inside 540 the braces. */ 541 while (*--p == ' ' || *p == '\t') 542 continue; 543 p[1] = '\0'; 544 while (*++decl == ' ' || *decl == '\t') 545 continue; 546 547 if (! name_start) 548 complain_at (loc, _("missing identifier in parameter declaration")); 549 else 550 { 551 char *name; 552 size_t name_len; 553 554 for (name_len = 1; 555 memchr (alphanum, name_start[name_len], sizeof alphanum); 556 name_len++) 557 continue; 558 559 name = xmalloc (name_len + 1); 560 memcpy (name, name_start, name_len); 561 name[name_len] = '\0'; 562 muscle_pair_list_grow (type, decl, name); 563 free (name); 564 } 565 566 scanner_last_string_free (); 567} 568 569static void 570version_check (location const *loc, char const *version) 571{ 572 if (strverscmp (version, PACKAGE_VERSION) > 0) 573 { 574 complain_at (*loc, "require bison %s, but have %s", 575 version, PACKAGE_VERSION); 576 exit (63); 577 } 578} 579 580static void 581gram_error (location const *loc, char const *msg) 582{ 583 complain_at (*loc, "%s", msg); 584} 585 586char const * 587token_name (int type) 588{ 589 return yytname[YYTRANSLATE (type)]; 590} 591