1/**************************************************************** 2Copyright (C) Lucent Technologies 1997 3All Rights Reserved 4 5Permission to use, copy, modify, and distribute this software and 6its documentation for any purpose and without fee is hereby 7granted, provided that the above copyright notice appear in all 8copies and that both that the copyright notice and this 9permission notice and warranty disclaimer appear in supporting 10documentation, and that the name Lucent Technologies or any of 11its entities not be used in advertising or publicity pertaining 12to distribution of the software without specific, written prior 13permission. 14 15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22THIS SOFTWARE. 23****************************************************************/ 24 25%{ 26#include <stdio.h> 27#include <string.h> 28#include "awk.h" 29 30void checkdup(Node *list, Cell *item); 31int yywrap(void) { return(1); } 32 33Node *beginloc = 0; 34Node *endloc = 0; 35int infunc = 0; /* = 1 if in arglist or body of func */ 36int inloop = 0; /* = 1 if in while, for, do */ 37char *curfname = 0; /* current function name */ 38Node *arglist = 0; /* list of args for current function */ 39%} 40 41%union { 42 Node *p; 43 Cell *cp; 44 int i; 45 char *s; 46} 47 48%token <i> FIRSTTOKEN /* must be first */ 49%token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND 50%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' 51%token <i> ARRAY 52%token <i> MATCH NOTMATCH MATCHOP 53%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE 54%token <i> AND BOR APPEND EQ GE GT LE LT NE IN 55%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 56%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE 57%token <i> ADD MINUS MULT DIVIDE MOD 58%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ 59%token <i> PRINT PRINTF SPRINTF 60%token <p> ELSE INTEST CONDEXPR 61%token <i> POSTINCR PREINCR POSTDECR PREDECR 62%token <cp> VAR IVAR VARNF CALL NUMBER STRING 63%token <s> REGEXPR 64 65%type <p> pas pattern ppattern plist pplist patlist prarg term re 66%type <p> pa_pat pa_stat pa_stats 67%type <s> reg_expr 68%type <p> simple_stmt opt_simple_stmt stmt stmtlist 69%type <p> var varname funcname varlist 70%type <p> for if else while 71%type <i> do st 72%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor 73%type <i> subop print 74 75%right ASGNOP 76%right '?' 77%right ':' 78%left BOR 79%left AND 80%left GETLINE 81%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' 82%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 83%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER 84%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR 85%left REGEXPR VAR VARNF IVAR WHILE '(' 86%left CAT 87%left '+' '-' 88%left '*' '/' '%' 89%left NOT UMINUS 90%right POWER 91%right DECR INCR 92%left INDIRECT 93%token LASTTOKEN /* must be last */ 94 95%% 96 97program: 98 pas { if (errorflag==0) 99 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } 100 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } 101 ; 102 103and: 104 AND | and NL 105 ; 106 107bor: 108 BOR | bor NL 109 ; 110 111comma: 112 ',' | comma NL 113 ; 114 115do: 116 DO | do NL 117 ; 118 119else: 120 ELSE | else NL 121 ; 122 123for: 124 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 125 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } 126 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 127 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } 128 | FOR '(' varname IN varname rparen {inloop++;} stmt 129 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } 130 ; 131 132funcname: 133 VAR { setfname($1); } 134 | CALL { setfname($1); } 135 ; 136 137if: 138 IF '(' pattern rparen { $$ = notnull($3); } 139 ; 140 141lbrace: 142 '{' | lbrace NL 143 ; 144 145nl: 146 NL | nl NL 147 ; 148 149opt_nl: 150 /* empty */ { $$ = 0; } 151 | nl 152 ; 153 154opt_pst: 155 /* empty */ { $$ = 0; } 156 | pst 157 ; 158 159 160opt_simple_stmt: 161 /* empty */ { $$ = 0; } 162 | simple_stmt 163 ; 164 165pas: 166 opt_pst { $$ = 0; } 167 | opt_pst pa_stats opt_pst { $$ = $2; } 168 ; 169 170pa_pat: 171 pattern { $$ = notnull($1); } 172 ; 173 174pa_stat: 175 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } 176 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } 177 | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); } 178 | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); } 179 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } 180 | XBEGIN lbrace stmtlist '}' 181 { beginloc = linkum(beginloc, $3); $$ = 0; } 182 | XEND lbrace stmtlist '}' 183 { endloc = linkum(endloc, $3); $$ = 0; } 184 | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}' 185 { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } 186 ; 187 188pa_stats: 189 pa_stat 190 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } 191 ; 192 193patlist: 194 pattern 195 | patlist comma pattern { $$ = linkum($1, $3); } 196 ; 197 198ppattern: 199 var ASGNOP ppattern { $$ = op2($2, $1, $3); } 200 | ppattern '?' ppattern ':' ppattern %prec '?' 201 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 202 | ppattern bor ppattern %prec BOR 203 { $$ = op2(BOR, notnull($1), notnull($3)); } 204 | ppattern and ppattern %prec AND 205 { $$ = op2(AND, notnull($1), notnull($3)); } 206 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 207 | ppattern MATCHOP ppattern 208 { if (constnode($3)) 209 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 210 else 211 $$ = op3($2, (Node *)1, $1, $3); } 212 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 213 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 214 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } 215 | re 216 | term 217 ; 218 219pattern: 220 var ASGNOP pattern { $$ = op2($2, $1, $3); } 221 | pattern '?' pattern ':' pattern %prec '?' 222 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 223 | pattern bor pattern %prec BOR 224 { $$ = op2(BOR, notnull($1), notnull($3)); } 225 | pattern and pattern %prec AND 226 { $$ = op2(AND, notnull($1), notnull($3)); } 227 | pattern EQ pattern { $$ = op2($2, $1, $3); } 228 | pattern GE pattern { $$ = op2($2, $1, $3); } 229 | pattern GT pattern { $$ = op2($2, $1, $3); } 230 | pattern LE pattern { $$ = op2($2, $1, $3); } 231 | pattern LT pattern { $$ = op2($2, $1, $3); } 232 | pattern NE pattern { $$ = op2($2, $1, $3); } 233 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 234 | pattern MATCHOP pattern 235 { if (constnode($3)) 236 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 237 else 238 $$ = op3($2, (Node *)1, $1, $3); } 239 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 240 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 241 | pattern '|' GETLINE var { 242 if (safe) SYNTAX("cmd | getline is unsafe"); 243 else $$ = op3(GETLINE, $4, itonp($2), $1); } 244 | pattern '|' GETLINE { 245 if (safe) SYNTAX("cmd | getline is unsafe"); 246 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } 247 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } 248 | re 249 | term 250 ; 251 252plist: 253 pattern comma pattern { $$ = linkum($1, $3); } 254 | plist comma pattern { $$ = linkum($1, $3); } 255 ; 256 257pplist: 258 ppattern 259 | pplist comma ppattern { $$ = linkum($1, $3); } 260 ; 261 262prarg: 263 /* empty */ { $$ = rectonode(); } 264 | pplist 265 | '(' plist ')' { $$ = $2; } 266 ; 267 268print: 269 PRINT | PRINTF 270 ; 271 272pst: 273 NL | ';' | pst NL | pst ';' 274 ; 275 276rbrace: 277 '}' | rbrace NL 278 ; 279 280re: 281 reg_expr 282 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } 283 | NOT re { $$ = op1(NOT, notnull($2)); } 284 ; 285 286reg_expr: 287 '/' {startreg();} REGEXPR '/' { $$ = $3; } 288 ; 289 290rparen: 291 ')' | rparen NL 292 ; 293 294simple_stmt: 295 print prarg '|' term { 296 if (safe) SYNTAX("print | is unsafe"); 297 else $$ = stat3($1, $2, itonp($3), $4); } 298 | print prarg APPEND term { 299 if (safe) SYNTAX("print >> is unsafe"); 300 else $$ = stat3($1, $2, itonp($3), $4); } 301 | print prarg GT term { 302 if (safe) SYNTAX("print > is unsafe"); 303 else $$ = stat3($1, $2, itonp($3), $4); } 304 | print prarg { $$ = stat3($1, $2, NIL, NIL); } 305 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } 306 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } 307 | pattern { $$ = exptostat($1); } 308 | error { yyclearin; SYNTAX("illegal statement"); } 309 ; 310 311st: 312 nl 313 | ';' opt_nl 314 ; 315 316stmt: 317 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); 318 $$ = stat1(BREAK, NIL); } 319 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); 320 $$ = stat1(CONTINUE, NIL); } 321 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st 322 { $$ = stat2(DO, $3, notnull($7)); } 323 | EXIT pattern st { $$ = stat1(EXIT, $2); } 324 | EXIT st { $$ = stat1(EXIT, NIL); } 325 | for 326 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } 327 | if stmt { $$ = stat3(IF, $1, $2, NIL); } 328 | lbrace stmtlist rbrace { $$ = $2; } 329 | NEXT st { if (infunc) 330 SYNTAX("next is illegal inside a function"); 331 $$ = stat1(NEXT, NIL); } 332 | NEXTFILE st { if (infunc) 333 SYNTAX("nextfile is illegal inside a function"); 334 $$ = stat1(NEXTFILE, NIL); } 335 | RETURN pattern st { $$ = stat1(RETURN, $2); } 336 | RETURN st { $$ = stat1(RETURN, NIL); } 337 | simple_stmt st 338 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } 339 | ';' opt_nl { $$ = 0; } 340 ; 341 342stmtlist: 343 stmt 344 | stmtlist stmt { $$ = linkum($1, $2); } 345 ; 346 347subop: 348 SUB | GSUB 349 ; 350 351term: 352 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } 353 | term '+' term { $$ = op2(ADD, $1, $3); } 354 | term '-' term { $$ = op2(MINUS, $1, $3); } 355 | term '*' term { $$ = op2(MULT, $1, $3); } 356 | term '/' term { $$ = op2(DIVIDE, $1, $3); } 357 | term '%' term { $$ = op2(MOD, $1, $3); } 358 | term POWER term { $$ = op2(POWER, $1, $3); } 359 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } 360 | '+' term %prec UMINUS { $$ = $2; } 361 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } 362 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } 363 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } 364 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } 365 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } 366 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } 367 | CLOSE term { $$ = op1(CLOSE, $2); } 368 | DECR var { $$ = op1(PREDECR, $2); } 369 | INCR var { $$ = op1(PREINCR, $2); } 370 | var DECR { $$ = op1(POSTDECR, $1); } 371 | var INCR { $$ = op1(POSTINCR, $1); } 372 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } 373 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } 374 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } 375 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } 376 | INDEX '(' pattern comma pattern ')' 377 { $$ = op2(INDEX, $3, $5); } 378 | INDEX '(' pattern comma reg_expr ')' 379 { SYNTAX("index() doesn't permit regular expressions"); 380 $$ = op2(INDEX, $3, (Node*)$5); } 381 | '(' pattern ')' { $$ = $2; } 382 | MATCHFCN '(' pattern comma reg_expr ')' 383 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } 384 | MATCHFCN '(' pattern comma pattern ')' 385 { if (constnode($5)) 386 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); 387 else 388 $$ = op3(MATCHFCN, (Node *)1, $3, $5); } 389 | NUMBER { $$ = celltonode($1, CCON); } 390 | SPLIT '(' pattern comma varname comma pattern ')' /* string */ 391 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } 392 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ 393 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } 394 | SPLIT '(' pattern comma varname ')' 395 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ 396 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } 397 | STRING { $$ = celltonode($1, CCON); } 398 | subop '(' reg_expr comma pattern ')' 399 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } 400 | subop '(' pattern comma pattern ')' 401 { if (constnode($3)) 402 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); 403 else 404 $$ = op4($1, (Node *)1, $3, $5, rectonode()); } 405 | subop '(' reg_expr comma pattern comma var ')' 406 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } 407 | subop '(' pattern comma pattern comma var ')' 408 { if (constnode($3)) 409 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); 410 else 411 $$ = op4($1, (Node *)1, $3, $5, $7); } 412 | SUBSTR '(' pattern comma pattern comma pattern ')' 413 { $$ = op3(SUBSTR, $3, $5, $7); } 414 | SUBSTR '(' pattern comma pattern ')' 415 { $$ = op3(SUBSTR, $3, $5, NIL); } 416 | var 417 ; 418 419var: 420 varname 421 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } 422 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } 423 | INDIRECT term { $$ = op1(INDIRECT, $2); } 424 ; 425 426varlist: 427 /* nothing */ { arglist = $$ = 0; } 428 | VAR { arglist = $$ = celltonode($1,CVAR); } 429 | varlist comma VAR { 430 checkdup($1, $3); 431 arglist = $$ = linkum($1,celltonode($3,CVAR)); } 432 ; 433 434varname: 435 VAR { $$ = celltonode($1, CVAR); } 436 | ARG { $$ = op1(ARG, itonp($1)); } 437 | VARNF { $$ = op1(VARNF, (Node *) $1); } 438 ; 439 440 441while: 442 WHILE '(' pattern rparen { $$ = notnull($3); } 443 ; 444 445%% 446 447void setfname(Cell *p) 448{ 449 if (isarr(p)) 450 SYNTAX("%s is an array, not a function", p->nval); 451 else if (isfcn(p)) 452 SYNTAX("you can't define function %s more than once", p->nval); 453 curfname = p->nval; 454} 455 456int constnode(Node *p) 457{ 458 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; 459} 460 461char *strnode(Node *p) 462{ 463 return ((Cell *)(p->narg[0]))->sval; 464} 465 466Node *notnull(Node *n) 467{ 468 switch (n->nobj) { 469 case LE: case LT: case EQ: case NE: case GT: case GE: 470 case BOR: case AND: case NOT: 471 return n; 472 default: 473 return op2(NE, n, nullnode); 474 } 475} 476 477void checkdup(Node *vl, Cell *cp) /* check if name already in list */ 478{ 479 char *s = cp->nval; 480 for ( ; vl; vl = vl->nnext) { 481 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { 482 SYNTAX("duplicate argument %s", s); 483 break; 484 } 485 } 486} 487