1/**************************************************************** 2Copyright (C) Lucent Technologies 1997 3All Rights Reserved 4 5Permission to use, copy, modify, and distribute this software and 6its documentation for any purpose and without fee is hereby 7granted, provided that the above copyright notice appear in all 8copies and that both that the copyright notice and this 9permission notice and warranty disclaimer appear in supporting 10documentation, and that the name Lucent Technologies or any of 11its entities not be used in advertising or publicity pertaining 12to distribution of the software without specific, written prior 13permission. 14 15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22THIS SOFTWARE. 23****************************************************************/ 24 25#include <stdio.h> 26#include <stdlib.h> 27#include <string.h> 28#include <ctype.h> 29#include "awk.h" 30#include "ytab.h" 31 32extern YYSTYPE yylval; 33extern int infunc; 34 35int lineno = 1; 36int bracecnt = 0; 37int brackcnt = 0; 38int parencnt = 0; 39 40typedef struct Keyword { 41 const char *word; 42 int sub; 43 int type; 44} Keyword; 45 46Keyword keywords[] ={ /* keep sorted: binary searched */ 47 { "BEGIN", XBEGIN, XBEGIN }, 48 { "END", XEND, XEND }, 49 { "NF", VARNF, VARNF }, 50 { "atan2", FATAN, BLTIN }, 51 { "break", BREAK, BREAK }, 52 { "close", CLOSE, CLOSE }, 53 { "continue", CONTINUE, CONTINUE }, 54 { "cos", FCOS, BLTIN }, 55 { "delete", DELETE, DELETE }, 56 { "do", DO, DO }, 57 { "else", ELSE, ELSE }, 58 { "exit", EXIT, EXIT }, 59 { "exp", FEXP, BLTIN }, 60 { "fflush", FFLUSH, BLTIN }, 61 { "for", FOR, FOR }, 62 { "func", FUNC, FUNC }, 63 { "function", FUNC, FUNC }, 64 { "getline", GETLINE, GETLINE }, 65 { "gsub", GSUB, GSUB }, 66 { "if", IF, IF }, 67 { "in", IN, IN }, 68 { "index", INDEX, INDEX }, 69 { "int", FINT, BLTIN }, 70 { "length", FLENGTH, BLTIN }, 71 { "log", FLOG, BLTIN }, 72 { "match", MATCHFCN, MATCHFCN }, 73 { "next", NEXT, NEXT }, 74 { "nextfile", NEXTFILE, NEXTFILE }, 75 { "print", PRINT, PRINT }, 76 { "printf", PRINTF, PRINTF }, 77 { "rand", FRAND, BLTIN }, 78 { "return", RETURN, RETURN }, 79 { "sin", FSIN, BLTIN }, 80 { "split", SPLIT, SPLIT }, 81 { "sprintf", SPRINTF, SPRINTF }, 82 { "sqrt", FSQRT, BLTIN }, 83 { "srand", FSRAND, BLTIN }, 84 { "sub", SUB, SUB }, 85 { "substr", SUBSTR, SUBSTR }, 86 { "system", FSYSTEM, BLTIN }, 87 { "tolower", FTOLOWER, BLTIN }, 88 { "toupper", FTOUPPER, BLTIN }, 89 { "while", WHILE, WHILE }, 90}; 91 92#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } 93 94int peek(void) 95{ 96 int c = input(); 97 unput(c); 98 return c; 99} 100 101int gettok(char **pbuf, int *psz) /* get next input token */ 102{ 103 int c, retc; 104 char *buf = *pbuf; 105 int sz = *psz; 106 char *bp = buf; 107 108 c = input(); 109 if (c == 0) 110 return 0; 111 buf[0] = c; 112 buf[1] = 0; 113 if (!isalnum(c) && c != '.' && c != '_') 114 return c; 115 116 *bp++ = c; 117 if (isalpha(c) || c == '_') { /* it's a varname */ 118 for ( ; (c = input()) != 0; ) { 119 if (bp-buf >= sz) 120 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) 121 FATAL( "out of space for name %.10s...", buf ); 122 if (isalnum(c) || c == '_') 123 *bp++ = c; 124 else { 125 *bp = 0; 126 unput(c); 127 break; 128 } 129 } 130 *bp = 0; 131 retc = 'a'; /* alphanumeric */ 132 } else { /* maybe it's a number, but could be . */ 133 char *rem; 134 /* read input until can't be a number */ 135 for ( ; (c = input()) != 0; ) { 136 if (bp-buf >= sz) 137 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) 138 FATAL( "out of space for number %.10s...", buf ); 139 if (isdigit(c) || c == 'e' || c == 'E' 140 || c == '.' || c == '+' || c == '-') 141 *bp++ = c; 142 else { 143 unput(c); 144 break; 145 } 146 } 147 *bp = 0; 148 strtod(buf, &rem); /* parse the number */ 149 if (rem == buf) { /* it wasn't a valid number at all */ 150 buf[1] = 0; /* return one character as token */ 151 retc = buf[0]; /* character is its own type */ 152 unputstr(rem+1); /* put rest back for later */ 153 } else { /* some prefix was a number */ 154 unputstr(rem); /* put rest back for later */ 155 rem[0] = 0; /* truncate buf after number part */ 156 retc = '0'; /* type is number */ 157 } 158 } 159 *pbuf = buf; 160 *psz = sz; 161 return retc; 162} 163 164int word(char *); 165int string(void); 166int regexpr(void); 167int sc = 0; /* 1 => return a } right now */ 168int reg = 0; /* 1 => return a REGEXPR now */ 169 170int yylex(void) 171{ 172 int c; 173 static char *buf = 0; 174 static int bufsize = 5; /* BUG: setting this small causes core dump! */ 175 176 if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL) 177 FATAL( "out of space in yylex" ); 178 if (sc) { 179 sc = 0; 180 RET('}'); 181 } 182 if (reg) { 183 reg = 0; 184 return regexpr(); 185 } 186 for (;;) { 187 c = gettok(&buf, &bufsize); 188 if (c == 0) 189 return 0; 190 if (isalpha(c) || c == '_') 191 return word(buf); 192 if (isdigit(c)) { 193 yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab); 194 /* should this also have STR set? */ 195 RET(NUMBER); 196 } 197 198 yylval.i = c; 199 switch (c) { 200 case '\n': /* {EOL} */ 201 RET(NL); 202 case '\r': /* assume \n is coming */ 203 case ' ': /* {WS}+ */ 204 case '\t': 205 break; 206 case '#': /* #.* strip comments */ 207 while ((c = input()) != '\n' && c != 0) 208 ; 209 unput(c); 210 break; 211 case ';': 212 RET(';'); 213 case '\\': 214 if (peek() == '\n') { 215 input(); 216 } else if (peek() == '\r') { 217 input(); input(); /* \n */ 218 lineno++; 219 } else { 220 RET(c); 221 } 222 break; 223 case '&': 224 if (peek() == '&') { 225 input(); RET(AND); 226 } else 227 RET('&'); 228 case '|': 229 if (peek() == '|') { 230 input(); RET(BOR); 231 } else 232 RET('|'); 233 case '!': 234 if (peek() == '=') { 235 input(); yylval.i = NE; RET(NE); 236 } else if (peek() == '~') { 237 input(); yylval.i = NOTMATCH; RET(MATCHOP); 238 } else 239 RET(NOT); 240 case '~': 241 yylval.i = MATCH; 242 RET(MATCHOP); 243 case '<': 244 if (peek() == '=') { 245 input(); yylval.i = LE; RET(LE); 246 } else { 247 yylval.i = LT; RET(LT); 248 } 249 case '=': 250 if (peek() == '=') { 251 input(); yylval.i = EQ; RET(EQ); 252 } else { 253 yylval.i = ASSIGN; RET(ASGNOP); 254 } 255 case '>': 256 if (peek() == '=') { 257 input(); yylval.i = GE; RET(GE); 258 } else if (peek() == '>') { 259 input(); yylval.i = APPEND; RET(APPEND); 260 } else { 261 yylval.i = GT; RET(GT); 262 } 263 case '+': 264 if (peek() == '+') { 265 input(); yylval.i = INCR; RET(INCR); 266 } else if (peek() == '=') { 267 input(); yylval.i = ADDEQ; RET(ASGNOP); 268 } else 269 RET('+'); 270 case '-': 271 if (peek() == '-') { 272 input(); yylval.i = DECR; RET(DECR); 273 } else if (peek() == '=') { 274 input(); yylval.i = SUBEQ; RET(ASGNOP); 275 } else 276 RET('-'); 277 case '*': 278 if (peek() == '=') { /* *= */ 279 input(); yylval.i = MULTEQ; RET(ASGNOP); 280 } else if (peek() == '*') { /* ** or **= */ 281 input(); /* eat 2nd * */ 282 if (peek() == '=') { 283 input(); yylval.i = POWEQ; RET(ASGNOP); 284 } else { 285 RET(POWER); 286 } 287 } else 288 RET('*'); 289 case '/': 290 RET('/'); 291 case '%': 292 if (peek() == '=') { 293 input(); yylval.i = MODEQ; RET(ASGNOP); 294 } else 295 RET('%'); 296 case '^': 297 if (peek() == '=') { 298 input(); yylval.i = POWEQ; RET(ASGNOP); 299 } else 300 RET(POWER); 301 302 case '$': 303 /* BUG: awkward, if not wrong */ 304 c = gettok(&buf, &bufsize); 305 if (isalpha(c)) { 306 if (strcmp(buf, "NF") == 0) { /* very special */ 307 unputstr("(NF)"); 308 RET(INDIRECT); 309 } 310 c = peek(); 311 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) { 312 unputstr(buf); 313 RET(INDIRECT); 314 } 315 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab); 316 RET(IVAR); 317 } else if (c == 0) { /* */ 318 SYNTAX( "unexpected end of input after $" ); 319 RET(';'); 320 } else { 321 unputstr(buf); 322 RET(INDIRECT); 323 } 324 325 case '}': 326 if (--bracecnt < 0) 327 SYNTAX( "extra }" ); 328 sc = 1; 329 RET(';'); 330 case ']': 331 if (--brackcnt < 0) 332 SYNTAX( "extra ]" ); 333 RET(']'); 334 case ')': 335 if (--parencnt < 0) 336 SYNTAX( "extra )" ); 337 RET(')'); 338 case '{': 339 bracecnt++; 340 RET('{'); 341 case '[': 342 brackcnt++; 343 RET('['); 344 case '(': 345 parencnt++; 346 RET('('); 347 348 case '"': 349 return string(); /* BUG: should be like tran.c ? */ 350 351 default: 352 RET(c); 353 } 354 } 355} 356 357int string(void) 358{ 359 int c, n; 360 char *s, *bp; 361 static char *buf = 0; 362 static int bufsz = 500; 363 364 if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) 365 FATAL("out of space for strings"); 366 for (bp = buf; (c = input()) != '"'; ) { 367 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) 368 FATAL("out of space for string %.10s...", buf); 369 switch (c) { 370 case '\n': 371 case '\r': 372 case 0: 373 SYNTAX( "non-terminated string %.10s...", buf ); 374 lineno++; 375 if (c == 0) /* hopeless */ 376 FATAL( "giving up" ); 377 break; 378 case '\\': 379 c = input(); 380 switch (c) { 381 case '"': *bp++ = '"'; break; 382 case 'n': *bp++ = '\n'; break; 383 case 't': *bp++ = '\t'; break; 384 case 'f': *bp++ = '\f'; break; 385 case 'r': *bp++ = '\r'; break; 386 case 'b': *bp++ = '\b'; break; 387 case 'v': *bp++ = '\v'; break; 388 case 'a': *bp++ = '\007'; break; 389 case '\\': *bp++ = '\\'; break; 390 391 case '0': case '1': case '2': /* octal: \d \dd \ddd */ 392 case '3': case '4': case '5': case '6': case '7': 393 n = c - '0'; 394 if ((c = peek()) >= '0' && c < '8') { 395 n = 8 * n + input() - '0'; 396 if ((c = peek()) >= '0' && c < '8') 397 n = 8 * n + input() - '0'; 398 } 399 *bp++ = n; 400 break; 401 402 case 'x': /* hex \x0-9a-fA-F + */ 403 { char xbuf[100], *px; 404 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) { 405 if (isdigit(c) 406 || (c >= 'a' && c <= 'f') 407 || (c >= 'A' && c <= 'F')) 408 *px++ = c; 409 else 410 break; 411 } 412 *px = 0; 413 unput(c); 414 sscanf(xbuf, "%x", &n); 415 *bp++ = n; 416 break; 417 } 418 419 default: 420 *bp++ = c; 421 break; 422 } 423 break; 424 default: 425 *bp++ = c; 426 break; 427 } 428 } 429 *bp = 0; 430 s = tostring(buf); 431 *bp++ = ' '; *bp++ = 0; 432 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); 433 RET(STRING); 434} 435 436 437int binsearch(char *w, Keyword *kp, int n) 438{ 439 int cond, low, mid, high; 440 441 low = 0; 442 high = n - 1; 443 while (low <= high) { 444 mid = (low + high) / 2; 445 if ((cond = strcmp(w, kp[mid].word)) < 0) 446 high = mid - 1; 447 else if (cond > 0) 448 low = mid + 1; 449 else 450 return mid; 451 } 452 return -1; 453} 454 455int word(char *w) 456{ 457 Keyword *kp; 458 int c, n; 459 460 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0])); 461/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */ 462 kp = keywords + n; 463 if (n != -1) { /* found in table */ 464 yylval.i = kp->sub; 465 switch (kp->type) { /* special handling */ 466 case BLTIN: 467 if (kp->sub == FSYSTEM && safe) 468 SYNTAX( "system is unsafe" ); 469 RET(kp->type); 470 case FUNC: 471 if (infunc) 472 SYNTAX( "illegal nested function" ); 473 RET(kp->type); 474 case RETURN: 475 if (!infunc) 476 SYNTAX( "return not in function" ); 477 RET(kp->type); 478 case VARNF: 479 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab); 480 RET(VARNF); 481 default: 482 RET(kp->type); 483 } 484 } 485 c = peek(); /* look for '(' */ 486 if (c != '(' && infunc && (n=isarg(w)) >= 0) { 487 yylval.i = n; 488 RET(ARG); 489 } else { 490 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab); 491 if (c == '(') { 492 RET(CALL); 493 } else { 494 RET(VAR); 495 } 496 } 497} 498 499void startreg(void) /* next call to yylex will return a regular expression */ 500{ 501 reg = 1; 502} 503 504int regexpr(void) 505{ 506 int c; 507 static char *buf = 0; 508 static int bufsz = 500; 509 char *bp; 510 511 if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) 512 FATAL("out of space for rex expr"); 513 bp = buf; 514 for ( ; (c = input()) != '/' && c != 0; ) { 515 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) 516 FATAL("out of space for reg expr %.10s...", buf); 517 if (c == '\n') { 518 SYNTAX( "newline in regular expression %.10s...", buf ); 519 unput('\n'); 520 break; 521 } else if (c == '\\') { 522 *bp++ = '\\'; 523 *bp++ = input(); 524 } else { 525 *bp++ = c; 526 } 527 } 528 *bp = 0; 529 if (c == 0) 530 SYNTAX("non-terminated regular expression %.10s...", buf); 531 yylval.s = tostring(buf); 532 unput('/'); 533 RET(REGEXPR); 534} 535 536/* low-level lexical stuff, sort of inherited from lex */ 537 538char ebuf[300]; 539char *ep = ebuf; 540char yysbuf[100]; /* pushback buffer */ 541char *yysptr = yysbuf; 542FILE *yyin = 0; 543 544int input(void) /* get next lexical input character */ 545{ 546 int c; 547 extern char *lexprog; 548 549 if (yysptr > yysbuf) 550 c = (uschar)*--yysptr; 551 else if (lexprog != NULL) { /* awk '...' */ 552 if ((c = (uschar)*lexprog) != 0) 553 lexprog++; 554 } else /* awk -f ... */ 555 c = pgetc(); 556 if (c == '\n') 557 lineno++; 558 else if (c == EOF) 559 c = 0; 560 if (ep >= ebuf + sizeof ebuf) 561 ep = ebuf; 562 return *ep++ = c; 563} 564 565void unput(int c) /* put lexical character back on input */ 566{ 567 if (c == '\n') 568 lineno--; 569 if (yysptr >= yysbuf + sizeof(yysbuf)) 570 FATAL("pushed back too much: %.20s...", yysbuf); 571 *yysptr++ = c; 572 if (--ep < ebuf) 573 ep = ebuf + sizeof(ebuf) - 1; 574} 575 576void unputstr(const char *s) /* put a string back on input */ 577{ 578 int i; 579 580 for (i = strlen(s)-1; i >= 0; i--) 581 unput(s[i]); 582} 583