1/* parse.y - parser for flex input */ 2 3%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP 4%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS 5 6%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH 7%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT 8 9%{ 10/*- 11 * Copyright (c) 1990 The Regents of the University of California. 12 * All rights reserved. 13 * 14 * This code is derived from software contributed to Berkeley by 15 * Vern Paxson. 16 * 17 * The United States Government has rights in this work pursuant 18 * to contract no. DE-AC03-76SF00098 between the United States 19 * Department of Energy and the University of California. 20 * 21 * Redistribution and use in source and binary forms with or without 22 * modification are permitted provided that: (1) source distributions retain 23 * this entire copyright notice and comment, and (2) distributions including 24 * binaries display the following acknowledgement: ``This product includes 25 * software developed by the University of California, Berkeley and its 26 * contributors'' in the documentation or other materials provided with the 27 * distribution and in all advertising materials mentioning features or use 28 * of this software. Neither the name of the University nor the names of 29 * its contributors may be used to endorse or promote products derived from 30 * this software without specific prior written permission. 31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 32 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 33 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 34 */ 35 36/* $Header: /home/daffy/u0/vern/flex/RCS/parse.y,v 2.28 95/04/21 11:51:51 vern Exp $ */ 37 38 39/* Some versions of bison are broken in that they use alloca() but don't 40 * declare it properly. The following is the patented (just kidding!) 41 * #ifdef chud to fix the problem, courtesy of Francois Pinard. 42 */ 43#ifdef YYBISON 44/* AIX requires this to be the first thing in the file. What a piece. */ 45# ifdef _AIX 46 #pragma alloca 47# endif 48#endif 49 50#include "flexdef.h" 51 52/* The remainder of the alloca() cruft has to come after including flexdef.h, 53 * so HAVE_ALLOCA_H is (possibly) defined. 54 */ 55#ifdef YYBISON 56# ifdef __GNUC__ 57# ifndef alloca 58# define alloca __builtin_alloca 59# endif 60# else 61# if HAVE_ALLOCA_H 62# include <alloca.h> 63# else 64# ifdef __hpux 65void *alloca (); 66# else 67# ifdef __TURBOC__ 68# include <malloc.h> 69# else 70char *alloca (); 71# endif 72# endif 73# endif 74# endif 75#endif 76 77/* Bletch, ^^^^ that was ugly! */ 78 79 80int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen; 81int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule; 82 83int *scon_stk; 84int scon_stk_ptr; 85 86static int madeany = false; /* whether we've made the '.' character class */ 87int previous_continued_action; /* whether the previous rule's action was '|' */ 88 89/* Expand a POSIX character class expression. */ 90#define CCL_EXPR(func) \ 91 { \ 92 int c; \ 93 for ( c = 0; c < csize; ++c ) \ 94 if ( isascii(c) && func(c) ) \ 95 ccladd( currccl, c ); \ 96 } 97 98/* While POSIX defines isblank(), it's not ANSI C. */ 99#define IS_BLANK(c) ((c) == ' ' || (c) == '\t') 100 101/* On some over-ambitious machines, such as DEC Alpha's, the default 102 * token type is "long" instead of "int"; this leads to problems with 103 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen 104 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the 105 * following should ensure that the default token type is "int". 106 */ 107#define YYSTYPE int 108 109%} 110 111%% 112goal : initlex sect1 sect1end sect2 initforrule 113 { /* add default rule */ 114 int def_rule; 115 116 pat = cclinit(); 117 cclnegate( pat ); 118 119 def_rule = mkstate( -pat ); 120 121 /* Remember the number of the default rule so we 122 * don't generate "can't match" warnings for it. 123 */ 124 default_rule = num_rules; 125 126 finish_rule( def_rule, false, 0, 0 ); 127 128 for ( i = 1; i <= lastsc; ++i ) 129 scset[i] = mkbranch( scset[i], def_rule ); 130 131 if ( spprdflt ) 132 add_action( 133 "YY_FATAL_ERROR( \"flex scanner jammed\" )" ); 134 else 135 add_action( "ECHO" ); 136 137 add_action( ";\n\tYY_BREAK\n" ); 138 } 139 ; 140 141initlex : 142 { /* initialize for processing rules */ 143 144 /* Create default DFA start condition. */ 145 scinstal( "INITIAL", false ); 146 } 147 ; 148 149sect1 : sect1 startconddecl namelist1 150 | sect1 options 151 | 152 | error 153 { synerr( "unknown error processing section 1" ); } 154 ; 155 156sect1end : SECTEND 157 { 158 check_options(); 159 scon_stk = allocate_integer_array( lastsc + 1 ); 160 scon_stk_ptr = 0; 161 } 162 ; 163 164startconddecl : SCDECL 165 { xcluflg = false; } 166 167 | XSCDECL 168 { xcluflg = true; } 169 ; 170 171namelist1 : namelist1 NAME 172 { scinstal( nmstr, xcluflg ); } 173 174 | NAME 175 { scinstal( nmstr, xcluflg ); } 176 177 | error 178 { synerr( "bad start condition list" ); } 179 ; 180 181options : OPTION_OP optionlist 182 ; 183 184optionlist : optionlist option 185 | 186 ; 187 188option : OPT_OUTFILE '=' NAME 189 { 190 outfilename = copy_string( nmstr ); 191 did_outfilename = 1; 192 } 193 | OPT_PREFIX '=' NAME 194 { prefix = copy_string( nmstr ); } 195 | OPT_YYCLASS '=' NAME 196 { yyclass = copy_string( nmstr ); } 197 ; 198 199sect2 : sect2 scon initforrule flexrule '\n' 200 { scon_stk_ptr = $2; } 201 | sect2 scon '{' sect2 '}' 202 { scon_stk_ptr = $2; } 203 | 204 ; 205 206initforrule : 207 { 208 /* Initialize for a parse of one rule. */ 209 trlcontxt = variable_trail_rule = varlength = false; 210 trailcnt = headcnt = rulelen = 0; 211 current_state_type = STATE_NORMAL; 212 previous_continued_action = continued_action; 213 in_rule = true; 214 215 new_rule(); 216 } 217 ; 218 219flexrule : '^' rule 220 { 221 pat = $2; 222 finish_rule( pat, variable_trail_rule, 223 headcnt, trailcnt ); 224 225 if ( scon_stk_ptr > 0 ) 226 { 227 for ( i = 1; i <= scon_stk_ptr; ++i ) 228 scbol[scon_stk[i]] = 229 mkbranch( scbol[scon_stk[i]], 230 pat ); 231 } 232 233 else 234 { 235 /* Add to all non-exclusive start conditions, 236 * including the default (0) start condition. 237 */ 238 239 for ( i = 1; i <= lastsc; ++i ) 240 if ( ! scxclu[i] ) 241 scbol[i] = mkbranch( scbol[i], 242 pat ); 243 } 244 245 if ( ! bol_needed ) 246 { 247 bol_needed = true; 248 249 if ( performance_report > 1 ) 250 pinpoint_message( 251 "'^' operator results in sub-optimal performance" ); 252 } 253 } 254 255 | rule 256 { 257 pat = $1; 258 finish_rule( pat, variable_trail_rule, 259 headcnt, trailcnt ); 260 261 if ( scon_stk_ptr > 0 ) 262 { 263 for ( i = 1; i <= scon_stk_ptr; ++i ) 264 scset[scon_stk[i]] = 265 mkbranch( scset[scon_stk[i]], 266 pat ); 267 } 268 269 else 270 { 271 for ( i = 1; i <= lastsc; ++i ) 272 if ( ! scxclu[i] ) 273 scset[i] = 274 mkbranch( scset[i], 275 pat ); 276 } 277 } 278 279 | EOF_OP 280 { 281 if ( scon_stk_ptr > 0 ) 282 build_eof_action(); 283 284 else 285 { 286 /* This EOF applies to all start conditions 287 * which don't already have EOF actions. 288 */ 289 for ( i = 1; i <= lastsc; ++i ) 290 if ( ! sceof[i] ) 291 scon_stk[++scon_stk_ptr] = i; 292 293 if ( scon_stk_ptr == 0 ) 294 warn( 295 "all start conditions already have <<EOF>> rules" ); 296 297 else 298 build_eof_action(); 299 } 300 } 301 302 | error 303 { synerr( "unrecognized rule" ); } 304 ; 305 306scon_stk_ptr : 307 { $$ = scon_stk_ptr; } 308 ; 309 310scon : '<' scon_stk_ptr namelist2 '>' 311 { $$ = $2; } 312 313 | '<' '*' '>' 314 { 315 $$ = scon_stk_ptr; 316 317 for ( i = 1; i <= lastsc; ++i ) 318 { 319 int j; 320 321 for ( j = 1; j <= scon_stk_ptr; ++j ) 322 if ( scon_stk[j] == i ) 323 break; 324 325 if ( j > scon_stk_ptr ) 326 scon_stk[++scon_stk_ptr] = i; 327 } 328 } 329 330 | 331 { $$ = scon_stk_ptr; } 332 ; 333 334namelist2 : namelist2 ',' sconname 335 336 | sconname 337 338 | error 339 { synerr( "bad start condition list" ); } 340 ; 341 342sconname : NAME 343 { 344 if ( (scnum = sclookup( nmstr )) == 0 ) 345 format_pinpoint_message( 346 "undeclared start condition %s", 347 nmstr ); 348 else 349 { 350 for ( i = 1; i <= scon_stk_ptr; ++i ) 351 if ( scon_stk[i] == scnum ) 352 { 353 format_warn( 354 "<%s> specified twice", 355 scname[scnum] ); 356 break; 357 } 358 359 if ( i > scon_stk_ptr ) 360 scon_stk[++scon_stk_ptr] = scnum; 361 } 362 } 363 ; 364 365rule : re2 re 366 { 367 if ( transchar[lastst[$2]] != SYM_EPSILON ) 368 /* Provide final transition \now/ so it 369 * will be marked as a trailing context 370 * state. 371 */ 372 $2 = link_machines( $2, 373 mkstate( SYM_EPSILON ) ); 374 375 mark_beginning_as_normal( $2 ); 376 current_state_type = STATE_NORMAL; 377 378 if ( previous_continued_action ) 379 { 380 /* We need to treat this as variable trailing 381 * context so that the backup does not happen 382 * in the action but before the action switch 383 * statement. If the backup happens in the 384 * action, then the rules "falling into" this 385 * one's action will *also* do the backup, 386 * erroneously. 387 */ 388 if ( ! varlength || headcnt != 0 ) 389 warn( 390 "trailing context made variable due to preceding '|' action" ); 391 392 /* Mark as variable. */ 393 varlength = true; 394 headcnt = 0; 395 } 396 397 if ( lex_compat || (varlength && headcnt == 0) ) 398 { /* variable trailing context rule */ 399 /* Mark the first part of the rule as the 400 * accepting "head" part of a trailing 401 * context rule. 402 * 403 * By the way, we didn't do this at the 404 * beginning of this production because back 405 * then current_state_type was set up for a 406 * trail rule, and add_accept() can create 407 * a new state ... 408 */ 409 add_accept( $1, 410 num_rules | YY_TRAILING_HEAD_MASK ); 411 variable_trail_rule = true; 412 } 413 414 else 415 trailcnt = rulelen; 416 417 $$ = link_machines( $1, $2 ); 418 } 419 420 | re2 re '$' 421 { synerr( "trailing context used twice" ); } 422 423 | re '$' 424 { 425 headcnt = 0; 426 trailcnt = 1; 427 rulelen = 1; 428 varlength = false; 429 430 current_state_type = STATE_TRAILING_CONTEXT; 431 432 if ( trlcontxt ) 433 { 434 synerr( "trailing context used twice" ); 435 $$ = mkstate( SYM_EPSILON ); 436 } 437 438 else if ( previous_continued_action ) 439 { 440 /* See the comment in the rule for "re2 re" 441 * above. 442 */ 443 warn( 444 "trailing context made variable due to preceding '|' action" ); 445 446 varlength = true; 447 } 448 449 if ( lex_compat || varlength ) 450 { 451 /* Again, see the comment in the rule for 452 * "re2 re" above. 453 */ 454 add_accept( $1, 455 num_rules | YY_TRAILING_HEAD_MASK ); 456 variable_trail_rule = true; 457 } 458 459 trlcontxt = true; 460 461 eps = mkstate( SYM_EPSILON ); 462 $$ = link_machines( $1, 463 link_machines( eps, mkstate( '\n' ) ) ); 464 } 465 466 | re 467 { 468 $$ = $1; 469 470 if ( trlcontxt ) 471 { 472 if ( lex_compat || (varlength && headcnt == 0) ) 473 /* Both head and trail are 474 * variable-length. 475 */ 476 variable_trail_rule = true; 477 else 478 trailcnt = rulelen; 479 } 480 } 481 ; 482 483 484re : re '|' series 485 { 486 varlength = true; 487 $$ = mkor( $1, $3 ); 488 } 489 490 | series 491 { $$ = $1; } 492 ; 493 494 495re2 : re '/' 496 { 497 /* This rule is written separately so the 498 * reduction will occur before the trailing 499 * series is parsed. 500 */ 501 502 if ( trlcontxt ) 503 synerr( "trailing context used twice" ); 504 else 505 trlcontxt = true; 506 507 if ( varlength ) 508 /* We hope the trailing context is 509 * fixed-length. 510 */ 511 varlength = false; 512 else 513 headcnt = rulelen; 514 515 rulelen = 0; 516 517 current_state_type = STATE_TRAILING_CONTEXT; 518 $$ = $1; 519 } 520 ; 521 522series : series singleton 523 { 524 /* This is where concatenation of adjacent patterns 525 * gets done. 526 */ 527 $$ = link_machines( $1, $2 ); 528 } 529 530 | singleton 531 { $$ = $1; } 532 ; 533 534singleton : singleton '*' 535 { 536 varlength = true; 537 538 $$ = mkclos( $1 ); 539 } 540 541 | singleton '+' 542 { 543 varlength = true; 544 $$ = mkposcl( $1 ); 545 } 546 547 | singleton '?' 548 { 549 varlength = true; 550 $$ = mkopt( $1 ); 551 } 552 553 | singleton '{' NUMBER ',' NUMBER '}' 554 { 555 varlength = true; 556 557 if ( $3 > $5 || $3 < 0 ) 558 { 559 synerr( "bad iteration values" ); 560 $$ = $1; 561 } 562 else 563 { 564 if ( $3 == 0 ) 565 { 566 if ( $5 <= 0 ) 567 { 568 synerr( 569 "bad iteration values" ); 570 $$ = $1; 571 } 572 else 573 $$ = mkopt( 574 mkrep( $1, 1, $5 ) ); 575 } 576 else 577 $$ = mkrep( $1, $3, $5 ); 578 } 579 } 580 581 | singleton '{' NUMBER ',' '}' 582 { 583 varlength = true; 584 585 if ( $3 <= 0 ) 586 { 587 synerr( "iteration value must be positive" ); 588 $$ = $1; 589 } 590 591 else 592 $$ = mkrep( $1, $3, INFINITY ); 593 } 594 595 | singleton '{' NUMBER '}' 596 { 597 /* The singleton could be something like "(foo)", 598 * in which case we have no idea what its length 599 * is, so we punt here. 600 */ 601 varlength = true; 602 603 if ( $3 <= 0 ) 604 { 605 synerr( "iteration value must be positive" ); 606 $$ = $1; 607 } 608 609 else 610 $$ = link_machines( $1, 611 copysingl( $1, $3 - 1 ) ); 612 } 613 614 | '.' 615 { 616 if ( ! madeany ) 617 { 618 /* Create the '.' character class. */ 619 anyccl = cclinit(); 620 ccladd( anyccl, '\n' ); 621 cclnegate( anyccl ); 622 623 if ( useecs ) 624 mkeccl( ccltbl + cclmap[anyccl], 625 ccllen[anyccl], nextecm, 626 ecgroup, csize, csize ); 627 628 madeany = true; 629 } 630 631 ++rulelen; 632 633 $$ = mkstate( -anyccl ); 634 } 635 636 | fullccl 637 { 638 if ( ! cclsorted ) 639 /* Sort characters for fast searching. We 640 * use a shell sort since this list could 641 * be large. 642 */ 643 cshell( ccltbl + cclmap[$1], ccllen[$1], true ); 644 645 if ( useecs ) 646 mkeccl( ccltbl + cclmap[$1], ccllen[$1], 647 nextecm, ecgroup, csize, csize ); 648 649 ++rulelen; 650 651 $$ = mkstate( -$1 ); 652 } 653 654 | PREVCCL 655 { 656 ++rulelen; 657 658 $$ = mkstate( -$1 ); 659 } 660 661 | '"' string '"' 662 { $$ = $2; } 663 664 | '(' re ')' 665 { $$ = $2; } 666 667 | CHAR 668 { 669 ++rulelen; 670 671 if ( caseins && $1 >= 'A' && $1 <= 'Z' ) 672 $1 = clower( $1 ); 673 674 $$ = mkstate( $1 ); 675 } 676 ; 677 678fullccl : '[' ccl ']' 679 { $$ = $2; } 680 681 | '[' '^' ccl ']' 682 { 683 cclnegate( $3 ); 684 $$ = $3; 685 } 686 ; 687 688ccl : ccl CHAR '-' CHAR 689 { 690 if ( caseins ) 691 { 692 if ( $2 >= 'A' && $2 <= 'Z' ) 693 $2 = clower( $2 ); 694 if ( $4 >= 'A' && $4 <= 'Z' ) 695 $4 = clower( $4 ); 696 } 697 698 if ( $2 > $4 ) 699 synerr( "negative range in character class" ); 700 701 else 702 { 703 for ( i = $2; i <= $4; ++i ) 704 ccladd( $1, i ); 705 706 /* Keep track if this ccl is staying in 707 * alphabetical order. 708 */ 709 cclsorted = cclsorted && ($2 > lastchar); 710 lastchar = $4; 711 } 712 713 $$ = $1; 714 } 715 716 | ccl CHAR 717 { 718 if ( caseins && $2 >= 'A' && $2 <= 'Z' ) 719 $2 = clower( $2 ); 720 721 ccladd( $1, $2 ); 722 cclsorted = cclsorted && ($2 > lastchar); 723 lastchar = $2; 724 $$ = $1; 725 } 726 727 | ccl ccl_expr 728 { 729 /* Too hard to properly maintain cclsorted. */ 730 cclsorted = false; 731 $$ = $1; 732 } 733 734 | 735 { 736 cclsorted = true; 737 lastchar = 0; 738 currccl = $$ = cclinit(); 739 } 740 ; 741 742ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) } 743 | CCE_ALPHA { CCL_EXPR(isalpha) } 744 | CCE_BLANK { CCL_EXPR(IS_BLANK) } 745 | CCE_CNTRL { CCL_EXPR(iscntrl) } 746 | CCE_DIGIT { CCL_EXPR(isdigit) } 747 | CCE_GRAPH { CCL_EXPR(isgraph) } 748 | CCE_LOWER { CCL_EXPR(islower) } 749 | CCE_PRINT { CCL_EXPR(isprint) } 750 | CCE_PUNCT { CCL_EXPR(ispunct) } 751 | CCE_SPACE { CCL_EXPR(isspace) } 752 | CCE_UPPER { 753 if ( caseins ) 754 CCL_EXPR(islower) 755 else 756 CCL_EXPR(isupper) 757 } 758 | CCE_XDIGIT { CCL_EXPR(isxdigit) } 759 ; 760 761string : string CHAR 762 { 763 if ( caseins && $2 >= 'A' && $2 <= 'Z' ) 764 $2 = clower( $2 ); 765 766 ++rulelen; 767 768 $$ = link_machines( $1, mkstate( $2 ) ); 769 } 770 771 | 772 { $$ = mkstate( SYM_EPSILON ); } 773 ; 774 775%% 776 777 778/* build_eof_action - build the "<<EOF>>" action for the active start 779 * conditions 780 */ 781 782void build_eof_action() 783 { 784 register int i; 785 char action_text[MAXLINE]; 786 787 for ( i = 1; i <= scon_stk_ptr; ++i ) 788 { 789 if ( sceof[scon_stk[i]] ) 790 format_pinpoint_message( 791 "multiple <<EOF>> rules for start condition %s", 792 scname[scon_stk[i]] ); 793 794 else 795 { 796 sceof[scon_stk[i]] = true; 797 sprintf( action_text, "case YY_STATE_EOF(%s):\n", 798 scname[scon_stk[i]] ); 799 add_action( action_text ); 800 } 801 } 802 803 line_directive_out( (FILE *) 0, 1 ); 804 805 /* This isn't a normal rule after all - don't count it as 806 * such, so we don't have any holes in the rule numbering 807 * (which make generating "rule can never match" warnings 808 * more difficult. 809 */ 810 --num_rules; 811 ++num_eof_rules; 812 } 813 814 815/* format_synerr - write out formatted syntax error */ 816 817void format_synerr( msg, arg ) 818char msg[], arg[]; 819 { 820 char errmsg[MAXLINE]; 821 822 (void) sprintf( errmsg, msg, arg ); 823 synerr( errmsg ); 824 } 825 826 827/* synerr - report a syntax error */ 828 829void synerr( str ) 830char str[]; 831 { 832 syntaxerror = true; 833 pinpoint_message( str ); 834 } 835 836 837/* format_warn - write out formatted warning */ 838 839void format_warn( msg, arg ) 840char msg[], arg[]; 841 { 842 char warn_msg[MAXLINE]; 843 844 (void) sprintf( warn_msg, msg, arg ); 845 warn( warn_msg ); 846 } 847 848 849/* warn - report a warning, unless -w was given */ 850 851void warn( str ) 852char str[]; 853 { 854 line_warning( str, linenum ); 855 } 856 857/* format_pinpoint_message - write out a message formatted with one string, 858 * pinpointing its location 859 */ 860 861void format_pinpoint_message( msg, arg ) 862char msg[], arg[]; 863 { 864 char errmsg[MAXLINE]; 865 866 (void) sprintf( errmsg, msg, arg ); 867 pinpoint_message( errmsg ); 868 } 869 870 871/* pinpoint_message - write out a message, pinpointing its location */ 872 873void pinpoint_message( str ) 874char str[]; 875 { 876 line_pinpoint( str, linenum ); 877 } 878 879 880/* line_warning - report a warning at a given line, unless -w was given */ 881 882void line_warning( str, line ) 883char str[]; 884int line; 885 { 886 char warning[MAXLINE]; 887 888 if ( ! nowarn ) 889 { 890 sprintf( warning, "warning, %s", str ); 891 line_pinpoint( warning, line ); 892 } 893 } 894 895 896/* line_pinpoint - write out a message, pinpointing it at the given line */ 897 898void line_pinpoint( str, line ) 899char str[]; 900int line; 901 { 902 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str ); 903 } 904 905 906/* yyerror - eat up an error message from the parser; 907 * currently, messages are ignore 908 */ 909 910void yyerror( msg ) 911char msg[]; 912 { 913 } 914