1/* Bison Grammar Scanner -*- C -*- 2 3 Copyright (C) 2002-2012 Free Software Foundation, Inc. 4 5 This file is part of Bison, the GNU Compiler Compiler. 6 7 This program is free software: you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation, either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 19 20%option debug nodefault noinput nounput noyywrap never-interactive 21%option prefix="gram_" outfile="lex.yy.c" 22 23%{ 24/* Work around a bug in flex 2.5.31. See Debian bug 333231 25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */ 26#undef gram_wrap 27#define gram_wrap() 1 28 29#define FLEX_PREFIX(Id) gram_ ## Id 30#include "flex-scanner.h" 31 32#include "complain.h" 33#include "files.h" 34#include "gram.h" 35#include "quotearg.h" 36#include "reader.h" 37#include "uniqstr.h" 38 39#include <c-ctype.h> 40#include <mbswidth.h> 41#include <quote.h> 42 43#include "scan-gram.h" 44 45#define YY_DECL GRAM_LEX_DECL 46 47#define YY_USER_INIT \ 48 code_start = scanner_cursor = loc->start; \ 49 50/* Location of scanner cursor. */ 51static boundary scanner_cursor; 52 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng); 54 55static size_t no_cr_read (FILE *, char *, size_t); 56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size)) 57 58#define ROLLBACK_CURRENT_TOKEN \ 59 do { \ 60 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \ 61 yyless (0); \ 62 } while (0) 63 64/* A string representing the most recently saved token. */ 65static char *last_string; 66 67/* Bracketed identifier. */ 68static uniqstr bracketed_id_str = 0; 69static location bracketed_id_loc; 70static boundary bracketed_id_start; 71static int bracketed_id_context_state = 0; 72 73void 74gram_scanner_last_string_free (void) 75{ 76 STRING_FREE; 77} 78 79static void handle_syncline (char *, location); 80static unsigned long int scan_integer (char const *p, int base, location loc); 81static int convert_ucn_to_byte (char const *hex_text); 82static void unexpected_eof (boundary, char const *); 83static void unexpected_newline (boundary, char const *); 84 85%} 86 /* A C-like comment in directives/rules. */ 87%x SC_YACC_COMMENT 88 /* Strings and characters in directives/rules. */ 89%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER 90 /* A identifier was just read in directives/rules. Special state 91 to capture the sequence 'identifier :'. */ 92%x SC_AFTER_IDENTIFIER 93 94 /* Three types of user code: 95 - prologue (code between '%{' '%}' in the first section, before %%); 96 - actions, printers, union, etc, (between braced in the middle section); 97 - epilogue (everything after the second %%). */ 98%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE 99 /* C and C++ comments in code. */ 100%x SC_COMMENT SC_LINE_COMMENT 101 /* Strings and characters in code. */ 102%x SC_STRING SC_CHARACTER 103 /* Bracketed identifiers support. */ 104%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID 105 106letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_] 107notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{] 108id {letter}({letter}|[-0-9])* 109int [0-9]+ 110 111/* POSIX says that a tag must be both an id and a C union member, but 112 historically almost any character is allowed in a tag. We disallow 113 NUL and newline, as this simplifies our implementation. */ 114tag [^\0\n>]+ 115 116/* Zero or more instances of backslash-newline. Following GCC, allow 117 white space between the backslash and the newline. */ 118splice (\\[ \f\t\v]*\n)* 119 120%% 121%{ 122 /* Nesting level of the current code in braces. */ 123 int braces_level PACIFY_CC (= 0); 124 125 /* Parent context state, when applicable. */ 126 int context_state PACIFY_CC (= 0); 127 128 /* Location of most recent identifier, when applicable. */ 129 location id_loc PACIFY_CC (= empty_location); 130 131 /* Where containing code started, when applicable. Its initial 132 value is relevant only when yylex is invoked in the SC_EPILOGUE 133 start condition. */ 134 boundary code_start = scanner_cursor; 135 136 /* Where containing comment or string or character literal started, 137 when applicable. */ 138 boundary token_start PACIFY_CC (= scanner_cursor); 139%} 140 141 142 /*-----------------------. 143 | Scanning white space. | 144 `-----------------------*/ 145 146<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID> 147{ 148 /* Comments and white space. */ 149 "," warn_at (*loc, _("stray ',' treated as white space")); 150 [ \f\n\t\v] | 151 "//".* continue; 152 "/*" { 153 token_start = loc->start; 154 context_state = YY_START; 155 BEGIN SC_YACC_COMMENT; 156 } 157 158 /* #line directives are not documented, and may be withdrawn or 159 modified in future versions of Bison. */ 160 ^"#line "{int}(" \"".*"\"")?"\n" { 161 handle_syncline (yytext + sizeof "#line " - 1, *loc); 162 } 163} 164 165 166 /*----------------------------. 167 | Scanning Bison directives. | 168 `----------------------------*/ 169 170 /* For directives that are also command line options, the regex must be 171 "%..." 172 after "[-_]"s are removed, and the directive must match the --long 173 option name, with a single string argument. Otherwise, add exceptions 174 to ../build-aux/cross-options.pl. */ 175 176<INITIAL> 177{ 178 "%binary" return PERCENT_NONASSOC; 179 "%code" return PERCENT_CODE; 180 "%debug" return PERCENT_DEBUG; 181 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC; 182 "%define" return PERCENT_DEFINE; 183 "%defines" return PERCENT_DEFINES; 184 "%destructor" return PERCENT_DESTRUCTOR; 185 "%dprec" return PERCENT_DPREC; 186 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE; 187 "%expect" return PERCENT_EXPECT; 188 "%expect"[-_]"rr" return PERCENT_EXPECT_RR; 189 "%file-prefix" return PERCENT_FILE_PREFIX; 190 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC; 191 "%initial-action" return PERCENT_INITIAL_ACTION; 192 "%glr-parser" return PERCENT_GLR_PARSER; 193 "%language" return PERCENT_LANGUAGE; 194 "%left" return PERCENT_LEFT; 195 "%lex-param" return PERCENT_LEX_PARAM; 196 "%locations" return PERCENT_LOCATIONS; 197 "%merge" return PERCENT_MERGE; 198 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX; 199 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC; 200 "%no"[-_]"lines" return PERCENT_NO_LINES; 201 "%nonassoc" return PERCENT_NONASSOC; 202 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER; 203 "%nterm" return PERCENT_NTERM; 204 "%output" return PERCENT_OUTPUT; 205 "%parse-param" return PERCENT_PARSE_PARAM; 206 "%prec" return PERCENT_PREC; 207 "%printer" return PERCENT_PRINTER; 208 "%pure"[-_]"parser" return PERCENT_PURE_PARSER; 209 "%require" return PERCENT_REQUIRE; 210 "%right" return PERCENT_RIGHT; 211 "%skeleton" return PERCENT_SKELETON; 212 "%start" return PERCENT_START; 213 "%term" return PERCENT_TOKEN; 214 "%token" return PERCENT_TOKEN; 215 "%token"[-_]"table" return PERCENT_TOKEN_TABLE; 216 "%type" return PERCENT_TYPE; 217 "%union" return PERCENT_UNION; 218 "%verbose" return PERCENT_VERBOSE; 219 "%yacc" return PERCENT_YACC; 220 221 "%"{id}|"%"{notletter}([[:graph:]])+ { 222 complain_at (*loc, _("invalid directive: %s"), quote (yytext)); 223 } 224 225 "=" return EQUAL; 226 "|" return PIPE; 227 ";" return SEMICOLON; 228 "<*>" return TYPE_TAG_ANY; 229 "<>" return TYPE_TAG_NONE; 230 231 {id} { 232 val->uniqstr = uniqstr_new (yytext); 233 id_loc = *loc; 234 bracketed_id_str = NULL; 235 BEGIN SC_AFTER_IDENTIFIER; 236 } 237 238 {int} { 239 val->integer = scan_integer (yytext, 10, *loc); 240 return INT; 241 } 242 0[xX][0-9abcdefABCDEF]+ { 243 val->integer = scan_integer (yytext, 16, *loc); 244 return INT; 245 } 246 247 /* Identifiers may not start with a digit. Yet, don't silently 248 accept "1FOO" as "1 FOO". */ 249 {int}{id} { 250 complain_at (*loc, _("invalid identifier: %s"), quote (yytext)); 251 } 252 253 /* Characters. */ 254 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER; 255 256 /* Strings. */ 257 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING; 258 259 /* Prologue. */ 260 "%{" code_start = loc->start; BEGIN SC_PROLOGUE; 261 262 /* Code in between braces. */ 263 "{" { 264 STRING_GROW; 265 braces_level = 0; 266 code_start = loc->start; 267 BEGIN SC_BRACED_CODE; 268 } 269 270 /* A type. */ 271 "<"{tag}">" { 272 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2); 273 STRING_FINISH; 274 val->uniqstr = uniqstr_new (last_string); 275 STRING_FREE; 276 return TYPE; 277 } 278 279 "%%" { 280 static int percent_percent_count; 281 if (++percent_percent_count == 2) 282 BEGIN SC_EPILOGUE; 283 return PERCENT_PERCENT; 284 } 285 286 "[" { 287 bracketed_id_str = NULL; 288 bracketed_id_start = loc->start; 289 bracketed_id_context_state = YY_START; 290 BEGIN SC_BRACKETED_ID; 291 } 292 293 [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. { 294 complain_at (*loc, "%s: %s", 295 ngettext ("invalid character", "invalid characters", yyleng), 296 quote_mem (yytext, yyleng)); 297 } 298 299 <<EOF>> { 300 loc->start = loc->end = scanner_cursor; 301 yyterminate (); 302 } 303} 304 305 306 /*-----------------------------------------------------------------. 307 | Scanning after an identifier, checking whether a colon is next. | 308 `-----------------------------------------------------------------*/ 309 310<SC_AFTER_IDENTIFIER> 311{ 312 "[" { 313 if (bracketed_id_str) 314 { 315 ROLLBACK_CURRENT_TOKEN; 316 BEGIN SC_RETURN_BRACKETED_ID; 317 *loc = id_loc; 318 return ID; 319 } 320 else 321 { 322 bracketed_id_start = loc->start; 323 bracketed_id_context_state = YY_START; 324 BEGIN SC_BRACKETED_ID; 325 } 326 } 327 ":" { 328 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL); 329 *loc = id_loc; 330 return ID_COLON; 331 } 332 . { 333 ROLLBACK_CURRENT_TOKEN; 334 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL); 335 *loc = id_loc; 336 return ID; 337 } 338 <<EOF>> { 339 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL); 340 *loc = id_loc; 341 return ID; 342 } 343} 344 345 /*--------------------------------. 346 | Scanning bracketed identifiers. | 347 `--------------------------------*/ 348 349<SC_BRACKETED_ID> 350{ 351 {id} { 352 if (bracketed_id_str) 353 { 354 complain_at (*loc, _("unexpected identifier in bracketed name: %s"), 355 quote (yytext)); 356 } 357 else 358 { 359 bracketed_id_str = uniqstr_new (yytext); 360 bracketed_id_loc = *loc; 361 } 362 } 363 "]" { 364 BEGIN bracketed_id_context_state; 365 if (bracketed_id_str) 366 { 367 if (INITIAL == bracketed_id_context_state) 368 { 369 val->uniqstr = bracketed_id_str; 370 bracketed_id_str = 0; 371 *loc = bracketed_id_loc; 372 return BRACKETED_ID; 373 } 374 } 375 else 376 complain_at (*loc, _("an identifier expected")); 377 } 378 379 [^\].A-Za-z0-9_/ \f\n\t\v]+|. { 380 complain_at (*loc, "%s: %s", 381 ngettext ("invalid character in bracketed name", 382 "invalid characters in bracketed name", yyleng), 383 quote_mem (yytext, yyleng)); 384 } 385 386 <<EOF>> { 387 BEGIN bracketed_id_context_state; 388 unexpected_eof (bracketed_id_start, "]"); 389 } 390} 391 392<SC_RETURN_BRACKETED_ID> 393{ 394 . { 395 ROLLBACK_CURRENT_TOKEN; 396 val->uniqstr = bracketed_id_str; 397 bracketed_id_str = 0; 398 *loc = bracketed_id_loc; 399 BEGIN INITIAL; 400 return BRACKETED_ID; 401 } 402} 403 404 405 /*---------------------------------------------------------------. 406 | Scanning a Yacc comment. The initial '/ *' is already eaten. | 407 `---------------------------------------------------------------*/ 408 409<SC_YACC_COMMENT> 410{ 411 "*/" BEGIN context_state; 412 .|\n continue; 413 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state; 414} 415 416 417 /*------------------------------------------------------------. 418 | Scanning a C comment. The initial '/ *' is already eaten. | 419 `------------------------------------------------------------*/ 420 421<SC_COMMENT> 422{ 423 "*"{splice}"/" STRING_GROW; BEGIN context_state; 424 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state; 425} 426 427 428 /*--------------------------------------------------------------. 429 | Scanning a line comment. The initial '//' is already eaten. | 430 `--------------------------------------------------------------*/ 431 432<SC_LINE_COMMENT> 433{ 434 "\n" STRING_GROW; BEGIN context_state; 435 {splice} STRING_GROW; 436 <<EOF>> BEGIN context_state; 437} 438 439 440 /*------------------------------------------------. 441 | Scanning a Bison string, including its escapes. | 442 | The initial quote is already eaten. | 443 `------------------------------------------------*/ 444 445<SC_ESCAPED_STRING> 446{ 447 "\""|"\n" { 448 if (yytext[0] == '\n') 449 unexpected_newline (token_start, "\""); 450 STRING_FINISH; 451 loc->start = token_start; 452 val->chars = last_string; 453 BEGIN INITIAL; 454 return STRING; 455 } 456 <<EOF>> { 457 unexpected_eof (token_start, "\""); 458 STRING_FINISH; 459 loc->start = token_start; 460 val->chars = last_string; 461 BEGIN INITIAL; 462 return STRING; 463 } 464} 465 466 /*----------------------------------------------------------. 467 | Scanning a Bison character literal, decoding its escapes. | 468 | The initial quote is already eaten. | 469 `----------------------------------------------------------*/ 470 471<SC_ESCAPED_CHARACTER> 472{ 473 "'"|"\n" { 474 STRING_FINISH; 475 loc->start = token_start; 476 val->character = last_string[0]; 477 { 478 /* FIXME: Eventually, make these errors. */ 479 if (last_string[0] == '\0') 480 { 481 warn_at (*loc, _("empty character literal")); 482 /* '\0' seems dangerous even if we are about to complain. */ 483 val->character = '\''; 484 } 485 else if (last_string[1] != '\0') 486 warn_at (*loc, _("extra characters in character literal")); 487 } 488 if (yytext[0] == '\n') 489 unexpected_newline (token_start, "'"); 490 STRING_FREE; 491 BEGIN INITIAL; 492 return CHAR; 493 } 494 <<EOF>> { 495 STRING_FINISH; 496 loc->start = token_start; 497 val->character = last_string[0]; 498 { 499 /* FIXME: Eventually, make these errors. */ 500 if (last_string[0] == '\0') 501 { 502 warn_at (*loc, _("empty character literal")); 503 /* '\0' seems dangerous even if we are about to complain. */ 504 val->character = '\''; 505 } 506 else if (last_string[1] != '\0') 507 warn_at (*loc, _("extra characters in character literal")); 508 } 509 unexpected_eof (token_start, "'"); 510 STRING_FREE; 511 BEGIN INITIAL; 512 return CHAR; 513 } 514} 515 516<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING> 517{ 518 \0 complain_at (*loc, _("invalid null character")); 519} 520 521 522 /*----------------------------. 523 | Decode escaped characters. | 524 `----------------------------*/ 525 526<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER> 527{ 528 \\[0-7]{1,3} { 529 unsigned long int c = strtoul (yytext + 1, NULL, 8); 530 if (!c || UCHAR_MAX < c) 531 complain_at (*loc, _("invalid number after \\-escape: %s"), 532 yytext+1); 533 else 534 obstack_1grow (&obstack_for_string, c); 535 } 536 537 \\x[0-9abcdefABCDEF]+ { 538 verify (UCHAR_MAX < ULONG_MAX); 539 unsigned long int c = strtoul (yytext + 2, NULL, 16); 540 if (!c || UCHAR_MAX < c) 541 complain_at (*loc, _("invalid number after \\-escape: %s"), 542 yytext+1); 543 else 544 obstack_1grow (&obstack_for_string, c); 545 } 546 547 \\a obstack_1grow (&obstack_for_string, '\a'); 548 \\b obstack_1grow (&obstack_for_string, '\b'); 549 \\f obstack_1grow (&obstack_for_string, '\f'); 550 \\n obstack_1grow (&obstack_for_string, '\n'); 551 \\r obstack_1grow (&obstack_for_string, '\r'); 552 \\t obstack_1grow (&obstack_for_string, '\t'); 553 \\v obstack_1grow (&obstack_for_string, '\v'); 554 555 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */ 556 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]); 557 558 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} { 559 int c = convert_ucn_to_byte (yytext); 560 if (c <= 0) 561 complain_at (*loc, _("invalid number after \\-escape: %s"), 562 yytext+1); 563 else 564 obstack_1grow (&obstack_for_string, c); 565 } 566 \\(.|\n) { 567 char const *p = yytext + 1; 568 /* Quote only if escaping won't make the character visible. */ 569 if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p)) 570 p = quote (p); 571 else 572 p = quotearg_style_mem (escape_quoting_style, p, 1); 573 complain_at (*loc, _("invalid character after \\-escape: %s"), p); 574 } 575} 576 577 /*--------------------------------------------. 578 | Scanning user-code characters and strings. | 579 `--------------------------------------------*/ 580 581<SC_CHARACTER,SC_STRING> 582{ 583 {splice}|\\{splice}[^\n\[\]] STRING_GROW; 584} 585 586<SC_CHARACTER> 587{ 588 "'" STRING_GROW; BEGIN context_state; 589 \n unexpected_newline (token_start, "'"); BEGIN context_state; 590 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state; 591} 592 593<SC_STRING> 594{ 595 "\"" STRING_GROW; BEGIN context_state; 596 \n unexpected_newline (token_start, "\""); BEGIN context_state; 597 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state; 598} 599 600 601 /*---------------------------------------------------. 602 | Strings, comments etc. can be found in user code. | 603 `---------------------------------------------------*/ 604 605<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE> 606{ 607 "'" { 608 STRING_GROW; 609 context_state = YY_START; 610 token_start = loc->start; 611 BEGIN SC_CHARACTER; 612 } 613 "\"" { 614 STRING_GROW; 615 context_state = YY_START; 616 token_start = loc->start; 617 BEGIN SC_STRING; 618 } 619 "/"{splice}"*" { 620 STRING_GROW; 621 context_state = YY_START; 622 token_start = loc->start; 623 BEGIN SC_COMMENT; 624 } 625 "/"{splice}"/" { 626 STRING_GROW; 627 context_state = YY_START; 628 BEGIN SC_LINE_COMMENT; 629 } 630} 631 632 633 634 /*-----------------------------------------------------------. 635 | Scanning some code in braces (actions). The initial "{" is | 636 | already eaten. | 637 `-----------------------------------------------------------*/ 638 639<SC_BRACED_CODE> 640{ 641 "{"|"<"{splice}"%" STRING_GROW; braces_level++; 642 "%"{splice}">" STRING_GROW; braces_level--; 643 "}" { 644 obstack_1grow (&obstack_for_string, '}'); 645 646 --braces_level; 647 if (braces_level < 0) 648 { 649 STRING_FINISH; 650 loc->start = code_start; 651 val->code = last_string; 652 BEGIN INITIAL; 653 return BRACED_CODE; 654 } 655 } 656 657 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly 658 (as '<' '<%'). */ 659 "<"{splice}"<" STRING_GROW; 660 661 <<EOF>> { 662 unexpected_eof (code_start, "}"); 663 STRING_FINISH; 664 loc->start = code_start; 665 val->code = last_string; 666 BEGIN INITIAL; 667 return BRACED_CODE; 668 } 669} 670 671 672 /*--------------------------------------------------------------. 673 | Scanning some prologue: from "%{" (already scanned) to "%}". | 674 `--------------------------------------------------------------*/ 675 676<SC_PROLOGUE> 677{ 678 "%}" { 679 STRING_FINISH; 680 loc->start = code_start; 681 val->chars = last_string; 682 BEGIN INITIAL; 683 return PROLOGUE; 684 } 685 686 <<EOF>> { 687 unexpected_eof (code_start, "%}"); 688 STRING_FINISH; 689 loc->start = code_start; 690 val->chars = last_string; 691 BEGIN INITIAL; 692 return PROLOGUE; 693 } 694} 695 696 697 /*---------------------------------------------------------------. 698 | Scanning the epilogue (everything after the second "%%", which | 699 | has already been eaten). | 700 `---------------------------------------------------------------*/ 701 702<SC_EPILOGUE> 703{ 704 <<EOF>> { 705 STRING_FINISH; 706 loc->start = code_start; 707 val->chars = last_string; 708 BEGIN INITIAL; 709 return EPILOGUE; 710 } 711} 712 713 714 /*-----------------------------------------------------. 715 | By default, grow the string obstack with the input. | 716 `-----------------------------------------------------*/ 717 718<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. | 719<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW; 720 721%% 722 723/* Read bytes from FP into buffer BUF of size SIZE. Return the 724 number of bytes read. Remove '\r' from input, treating \r\n 725 and isolated \r as \n. */ 726 727static size_t 728no_cr_read (FILE *fp, char *buf, size_t size) 729{ 730 size_t bytes_read = fread (buf, 1, size, fp); 731 if (bytes_read) 732 { 733 char *w = memchr (buf, '\r', bytes_read); 734 if (w) 735 { 736 char const *r = ++w; 737 char const *lim = buf + bytes_read; 738 739 for (;;) 740 { 741 /* Found an '\r'. Treat it like '\n', but ignore any 742 '\n' that immediately follows. */ 743 w[-1] = '\n'; 744 if (r == lim) 745 { 746 int ch = getc (fp); 747 if (ch != '\n' && ungetc (ch, fp) != ch) 748 break; 749 } 750 else if (*r == '\n') 751 r++; 752 753 /* Copy until the next '\r'. */ 754 do 755 { 756 if (r == lim) 757 return w - buf; 758 } 759 while ((*w++ = *r++) != '\r'); 760 } 761 762 return w - buf; 763 } 764 } 765 766 return bytes_read; 767} 768 769 770 771/*------------------------------------------------------. 772| Scan NUMBER for a base-BASE integer at location LOC. | 773`------------------------------------------------------*/ 774 775static unsigned long int 776scan_integer (char const *number, int base, location loc) 777{ 778 verify (INT_MAX < ULONG_MAX); 779 unsigned long int num = strtoul (number, NULL, base); 780 781 if (INT_MAX < num) 782 { 783 complain_at (loc, _("integer out of range: %s"), quote (number)); 784 num = INT_MAX; 785 } 786 787 return num; 788} 789 790 791/*------------------------------------------------------------------. 792| Convert universal character name UCN to a single-byte character, | 793| and return that character. Return -1 if UCN does not correspond | 794| to a single-byte character. | 795`------------------------------------------------------------------*/ 796 797static int 798convert_ucn_to_byte (char const *ucn) 799{ 800 verify (UCHAR_MAX <= INT_MAX); 801 unsigned long int code = strtoul (ucn + 2, NULL, 16); 802 803 /* FIXME: Currently we assume Unicode-compatible unibyte characters 804 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On 805 non-ASCII hosts we support only the portable C character set. 806 These limitations should be removed once we add support for 807 multibyte characters. */ 808 809 if (UCHAR_MAX < code) 810 return -1; 811 812#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e) 813 { 814 /* A non-ASCII host. Use CODE to index into a table of the C 815 basic execution character set, which is guaranteed to exist on 816 all Standard C platforms. This table also includes '$', '@', 817 and '`', which are not in the basic execution character set but 818 which are unibyte characters on all the platforms that we know 819 about. */ 820 static signed char const table[] = 821 { 822 '\0', -1, -1, -1, -1, -1, -1, '\a', 823 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1, 824 -1, -1, -1, -1, -1, -1, -1, -1, 825 -1, -1, -1, -1, -1, -1, -1, -1, 826 ' ', '!', '"', '#', '$', '%', '&', '\'', 827 '(', ')', '*', '+', ',', '-', '.', '/', 828 '0', '1', '2', '3', '4', '5', '6', '7', 829 '8', '9', ':', ';', '<', '=', '>', '?', 830 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 831 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 832 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 833 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', 834 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 835 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 836 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 837 'x', 'y', 'z', '{', '|', '}', '~' 838 }; 839 840 code = code < sizeof table ? table[code] : -1; 841 } 842#endif 843 844 return code; 845} 846 847 848/*---------------------------------------------------------------------. 849| Handle '#line INT( "FILE")?\n'. ARGS has already skipped '#line '. | 850`---------------------------------------------------------------------*/ 851 852static void 853handle_syncline (char *args, location loc) 854{ 855 char *file; 856 unsigned long int lineno = strtoul (args, &file, 10); 857 if (INT_MAX <= lineno) 858 { 859 warn_at (loc, _("line number overflow")); 860 lineno = INT_MAX; 861 } 862 863 file = mbschr (file, '"'); 864 if (file) 865 { 866 *mbschr (file + 1, '"') = '\0'; 867 current_file = uniqstr_new (file + 1); 868 } 869 boundary_set (&scanner_cursor, current_file, lineno, 1); 870} 871 872 873/*----------------------------------------------------------------. 874| For a token or comment starting at START, report message MSGID, | 875| which should say that an end marker was found before | 876| the expected TOKEN_END. | 877`----------------------------------------------------------------*/ 878 879static void 880unexpected_end (boundary start, char const *msgid, char const *token_end) 881{ 882 location loc; 883 loc.start = start; 884 loc.end = scanner_cursor; 885 token_end = quote (token_end); 886 /* Instead of '\'', display "'". */ 887 if (!strcmp (token_end, "'\\''")) 888 token_end = "\"'\""; 889 complain_at (loc, _(msgid), token_end); 890} 891 892 893/*------------------------------------------------------------------------. 894| Report an unexpected EOF in a token or comment starting at START. | 895| An end of file was encountered and the expected TOKEN_END was missing. | 896`------------------------------------------------------------------------*/ 897 898static void 899unexpected_eof (boundary start, char const *token_end) 900{ 901 unexpected_end (start, N_("missing %s at end of file"), token_end); 902} 903 904 905/*----------------------------------------. 906| Likewise, but for unexpected newlines. | 907`----------------------------------------*/ 908 909static void 910unexpected_newline (boundary start, char const *token_end) 911{ 912 unexpected_end (start, N_("missing %s at end of line"), token_end); 913} 914 915 916/*-------------------------. 917| Initialize the scanner. | 918`-------------------------*/ 919 920void 921gram_scanner_initialize (void) 922{ 923 obstack_init (&obstack_for_string); 924} 925 926 927/*-----------------------------------------------. 928| Free all the memory allocated to the scanner. | 929`-----------------------------------------------*/ 930 931void 932gram_scanner_free (void) 933{ 934 obstack_free (&obstack_for_string, 0); 935 /* Reclaim Flex's buffers. */ 936 yylex_destroy (); 937} 938