1/*	$OpenBSD: syn.c,v 1.30 2015/09/01 13:12:31 tedu Exp $	*/
2
3/*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009,
5 *		 2011, 2012, 2013, 2014, 2015, 2016
6 *	mirabilos <m@mirbsd.org>
7 *
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
13 *
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
22 */
23
24#include "sh.h"
25
26__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.109 2016/01/19 23:12:15 tg Exp $");
27
28struct nesting_state {
29	int start_token;	/* token than began nesting (eg, FOR) */
30	int start_line;		/* line nesting began on */
31};
32
33struct yyrecursive_state {
34	struct yyrecursive_state *next;
35	struct ioword **old_herep;
36	int old_symbol;
37	int old_salias;
38	int old_nesting_type;
39	bool old_reject;
40};
41
42static void yyparse(void);
43static struct op *pipeline(int);
44static struct op *andor(void);
45static struct op *c_list(bool);
46static struct ioword *synio(int);
47static struct op *nested(int, int, int);
48static struct op *get_command(int);
49static struct op *dogroup(void);
50static struct op *thenpart(void);
51static struct op *elsepart(void);
52static struct op *caselist(void);
53static struct op *casepart(int);
54static struct op *function_body(char *, bool);
55static char **wordlist(void);
56static struct op *block(int, struct op *, struct op *);
57static struct op *newtp(int);
58static void syntaxerr(const char *) MKSH_A_NORETURN;
59static void nesting_push(struct nesting_state *, int);
60static void nesting_pop(struct nesting_state *);
61static int inalias(struct source *) MKSH_A_PURE;
62static Test_op dbtestp_isa(Test_env *, Test_meta);
63static const char *dbtestp_getopnd(Test_env *, Test_op, bool);
64static int dbtestp_eval(Test_env *, Test_op, const char *,
65    const char *, bool);
66static void dbtestp_error(Test_env *, int, const char *) MKSH_A_NORETURN;
67
68static struct op *outtree;		/* yyparse output */
69static struct nesting_state nesting;	/* \n changed to ; */
70
71static bool reject;			/* token(cf) gets symbol again */
72static int symbol;			/* yylex value */
73static int sALIAS = ALIAS;		/* 0 in yyrecursive */
74
75#define REJECT		(reject = true)
76#define ACCEPT		(reject = false)
77#define token(cf)	((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf)))
78#define tpeek(cf)	((reject) ? (symbol) : (REJECT, symbol = yylex(cf)))
79#define musthave(c,cf)	do { if (token(cf) != (c)) syntaxerr(NULL); } while (/* CONSTCOND */ 0)
80
81static const char Tcbrace[] = "}";
82static const char Tesac[] = "esac";
83
84static void
85yyparse(void)
86{
87	int c;
88
89	ACCEPT;
90
91	outtree = c_list(source->type == SSTRING);
92	c = tpeek(0);
93	if (c == 0 && !outtree)
94		outtree = newtp(TEOF);
95	else if (c != '\n' && c != 0)
96		syntaxerr(NULL);
97}
98
99static struct op *
100pipeline(int cf)
101{
102	struct op *t, *p, *tl = NULL;
103
104	t = get_command(cf);
105	if (t != NULL) {
106		while (token(0) == '|') {
107			if ((p = get_command(CONTIN)) == NULL)
108				syntaxerr(NULL);
109			if (tl == NULL)
110				t = tl = block(TPIPE, t, p);
111			else
112				tl = tl->right = block(TPIPE, tl->right, p);
113		}
114		REJECT;
115	}
116	return (t);
117}
118
119static struct op *
120andor(void)
121{
122	struct op *t, *p;
123	int c;
124
125	t = pipeline(0);
126	if (t != NULL) {
127		while ((c = token(0)) == LOGAND || c == LOGOR) {
128			if ((p = pipeline(CONTIN)) == NULL)
129				syntaxerr(NULL);
130			t = block(c == LOGAND? TAND: TOR, t, p);
131		}
132		REJECT;
133	}
134	return (t);
135}
136
137static struct op *
138c_list(bool multi)
139{
140	struct op *t = NULL, *p, *tl = NULL;
141	int c;
142	bool have_sep;
143
144	while (/* CONSTCOND */ 1) {
145		p = andor();
146		/*
147		 * Token has always been read/rejected at this point, so
148		 * we don't worry about what flags to pass token()
149		 */
150		c = token(0);
151		have_sep = true;
152		if (c == '\n' && (multi || inalias(source))) {
153			if (!p)
154				/* ignore blank lines */
155				continue;
156		} else if (!p)
157			break;
158		else if (c == '&' || c == COPROC)
159			p = block(c == '&' ? TASYNC : TCOPROC, p, NULL);
160		else if (c != ';')
161			have_sep = false;
162		if (!t)
163			t = p;
164		else if (!tl)
165			t = tl = block(TLIST, t, p);
166		else
167			tl = tl->right = block(TLIST, tl->right, p);
168		if (!have_sep)
169			break;
170	}
171	REJECT;
172	return (t);
173}
174
175static const char IONDELIM_delim[] = { CHAR, '<', CHAR, '<', EOS };
176
177static struct ioword *
178synio(int cf)
179{
180	struct ioword *iop;
181	static struct ioword *nextiop;
182	bool ishere;
183
184	if (nextiop != NULL) {
185		iop = nextiop;
186		nextiop = NULL;
187		return (iop);
188	}
189
190	if (tpeek(cf) != REDIR)
191		return (NULL);
192	ACCEPT;
193	iop = yylval.iop;
194	ishere = (iop->ioflag & IOTYPE) == IOHERE;
195	if (iop->ioflag & IOHERESTR) {
196		musthave(LWORD, 0);
197	} else if (ishere && tpeek(HEREDELIM) == '\n') {
198		ACCEPT;
199		yylval.cp = wdcopy(IONDELIM_delim, ATEMP);
200		iop->ioflag |= IOEVAL | IONDELIM;
201	} else
202		musthave(LWORD, ishere ? HEREDELIM : 0);
203	if (ishere) {
204		iop->delim = yylval.cp;
205		if (*ident != 0 && !(iop->ioflag & IOHERESTR)) {
206			/* unquoted */
207			iop->ioflag |= IOEVAL;
208		}
209		if (herep > &heres[HERES - 1])
210			yyerror("too many %ss\n", "<<");
211		*herep++ = iop;
212	} else
213		iop->ioname = yylval.cp;
214
215	if (iop->ioflag & IOBASH) {
216		char *cp;
217
218		nextiop = alloc(sizeof(*iop), ATEMP);
219#ifdef MKSH_CONSERVATIVE_FDS
220		nextiop->ioname = cp = alloc(3, ATEMP);
221#else
222		nextiop->ioname = cp = alloc(5, ATEMP);
223
224		if (iop->unit > 9) {
225			*cp++ = CHAR;
226			*cp++ = digits_lc[iop->unit / 10];
227		}
228#endif
229		*cp++ = CHAR;
230		*cp++ = digits_lc[iop->unit % 10];
231		*cp = EOS;
232
233		iop->ioflag &= ~IOBASH;
234		nextiop->unit = 2;
235		nextiop->ioflag = IODUP;
236		nextiop->delim = NULL;
237		nextiop->heredoc = NULL;
238	}
239	return (iop);
240}
241
242static struct op *
243nested(int type, int smark, int emark)
244{
245	struct op *t;
246	struct nesting_state old_nesting;
247
248	nesting_push(&old_nesting, smark);
249	t = c_list(true);
250	musthave(emark, KEYWORD|sALIAS);
251	nesting_pop(&old_nesting);
252	return (block(type, t, NULL));
253}
254
255static const char let_cmd[] = {
256	QCHAR, 'l', CHAR, 'e', CHAR, 't', CHAR, ']', EOS
257};
258static const char setA_cmd0[] = {
259	QCHAR, 's', CHAR, 'e', CHAR, 't', EOS
260};
261static const char setA_cmd1[] = {
262	CHAR, '-', CHAR, 'A', EOS
263};
264static const char setA_cmd2[] = {
265	CHAR, '-', CHAR, '-', EOS
266};
267
268static struct op *
269get_command(int cf)
270{
271	struct op *t;
272	int c, iopn = 0, syniocf, lno;
273	struct ioword *iop, **iops;
274	XPtrV args, vars;
275	struct nesting_state old_nesting;
276
277	/* NUFILE is small enough to leave this addition unchecked */
278	iops = alloc2((NUFILE + 1), sizeof(struct ioword *), ATEMP);
279	XPinit(args, 16);
280	XPinit(vars, 16);
281
282	syniocf = KEYWORD|sALIAS;
283	switch (c = token(cf|KEYWORD|sALIAS|CMDASN)) {
284	default:
285		REJECT;
286		afree(iops, ATEMP);
287		XPfree(args);
288		XPfree(vars);
289		/* empty line */
290		return (NULL);
291
292	case LWORD:
293	case REDIR:
294		REJECT;
295		syniocf &= ~(KEYWORD|sALIAS);
296		t = newtp(TCOM);
297		t->lineno = source->line;
298		goto get_command_begin;
299		while (/* CONSTCOND */ 1) {
300			bool check_assign_cmd;
301
302			if (XPsize(args) == 0) {
303 get_command_begin:
304				check_assign_cmd = true;
305				cf = sALIAS | CMDASN;
306			} else if (t->u.evalflags)
307				cf = CMDWORD | CMDASN;
308			else
309				cf = CMDWORD;
310			switch (tpeek(cf)) {
311			case REDIR:
312				while ((iop = synio(cf)) != NULL) {
313					if (iopn >= NUFILE)
314						yyerror("too many %ss\n",
315						    "redirection");
316					iops[iopn++] = iop;
317				}
318				break;
319
320			case LWORD:
321				ACCEPT;
322				/*
323				 * the iopn == 0 and XPsize(vars) == 0 are
324				 * dubious but AT&T ksh acts this way
325				 */
326				if (iopn == 0 && XPsize(vars) == 0 &&
327				    check_assign_cmd) {
328					if (assign_command(ident, false))
329						t->u.evalflags = DOVACHECK;
330					else if (strcmp(ident, Tcommand) != 0)
331						check_assign_cmd = false;
332				}
333				if ((XPsize(args) == 0 || Flag(FKEYWORD)) &&
334				    is_wdvarassign(yylval.cp))
335					XPput(vars, yylval.cp);
336				else
337					XPput(args, yylval.cp);
338				break;
339
340			case '(' /*)*/:
341				if (XPsize(args) == 0 && XPsize(vars) == 1 &&
342				    is_wdvarassign(yylval.cp)) {
343					char *tcp;
344
345					/* wdarrassign: foo=(bar) */
346					ACCEPT;
347
348					/* manipulate the vars string */
349					tcp = XPptrv(vars)[(vars.len = 0)];
350					/* 'varname=' -> 'varname' */
351					tcp[wdscan(tcp, EOS) - tcp - 3] = EOS;
352
353					/* construct new args strings */
354					XPput(args, wdcopy(setA_cmd0, ATEMP));
355					XPput(args, wdcopy(setA_cmd1, ATEMP));
356					XPput(args, tcp);
357					XPput(args, wdcopy(setA_cmd2, ATEMP));
358
359					/* slurp in words till closing paren */
360					while (token(CONTIN) == LWORD)
361						XPput(args, yylval.cp);
362					if (symbol != /*(*/ ')')
363						syntaxerr(NULL);
364				} else {
365					/*
366					 * Check for "> foo (echo hi)"
367					 * which AT&T ksh allows (not
368					 * POSIX, but not disallowed)
369					 */
370					afree(t, ATEMP);
371					if (XPsize(args) == 0 &&
372					    XPsize(vars) == 0) {
373						ACCEPT;
374						goto Subshell;
375					}
376
377					/* must be a function */
378					if (iopn != 0 || XPsize(args) != 1 ||
379					    XPsize(vars) != 0)
380						syntaxerr(NULL);
381					ACCEPT;
382					musthave(/*(*/')', 0);
383					t = function_body(XPptrv(args)[0], false);
384				}
385				goto Leave;
386
387			default:
388				goto Leave;
389			}
390		}
391 Leave:
392		break;
393
394	case '(': /*)*/ {
395		int subshell_nesting_type_saved;
396 Subshell:
397		subshell_nesting_type_saved = subshell_nesting_type;
398		subshell_nesting_type = ')';
399		t = nested(TPAREN, '(', ')');
400		subshell_nesting_type = subshell_nesting_type_saved;
401		break;
402	    }
403
404	case '{': /*}*/
405		t = nested(TBRACE, '{', '}');
406		break;
407
408	case MDPAREN:
409		/* leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
410		lno = source->line;
411		ACCEPT;
412		switch (token(LETEXPR)) {
413		case LWORD:
414			break;
415		case '(': /*)*/
416			c = '(';
417			goto Subshell;
418		default:
419			syntaxerr(NULL);
420		}
421		t = newtp(TCOM);
422		t->lineno = lno;
423		XPput(args, wdcopy(let_cmd, ATEMP));
424		XPput(args, yylval.cp);
425		break;
426
427	case DBRACKET: /* [[ .. ]] */
428		/* leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
429		t = newtp(TDBRACKET);
430		ACCEPT;
431		{
432			Test_env te;
433
434			te.flags = TEF_DBRACKET;
435			te.pos.av = &args;
436			te.isa = dbtestp_isa;
437			te.getopnd = dbtestp_getopnd;
438			te.eval = dbtestp_eval;
439			te.error = dbtestp_error;
440
441			test_parse(&te);
442		}
443		break;
444
445	case FOR:
446	case SELECT:
447		t = newtp((c == FOR) ? TFOR : TSELECT);
448		musthave(LWORD, CMDASN);
449		if (!is_wdvarname(yylval.cp, true))
450			yyerror("%s: %s\n", c == FOR ? "for" : Tselect,
451			    "bad identifier");
452		strdupx(t->str, ident, ATEMP);
453		nesting_push(&old_nesting, c);
454		t->vars = wordlist();
455		t->left = dogroup();
456		nesting_pop(&old_nesting);
457		break;
458
459	case WHILE:
460	case UNTIL:
461		nesting_push(&old_nesting, c);
462		t = newtp((c == WHILE) ? TWHILE : TUNTIL);
463		t->left = c_list(true);
464		t->right = dogroup();
465		nesting_pop(&old_nesting);
466		break;
467
468	case CASE:
469		t = newtp(TCASE);
470		musthave(LWORD, 0);
471		t->str = yylval.cp;
472		nesting_push(&old_nesting, c);
473		t->left = caselist();
474		nesting_pop(&old_nesting);
475		break;
476
477	case IF:
478		nesting_push(&old_nesting, c);
479		t = newtp(TIF);
480		t->left = c_list(true);
481		t->right = thenpart();
482		musthave(FI, KEYWORD|sALIAS);
483		nesting_pop(&old_nesting);
484		break;
485
486	case BANG:
487		syniocf &= ~(KEYWORD|sALIAS);
488		t = pipeline(0);
489		if (t == NULL)
490			syntaxerr(NULL);
491		t = block(TBANG, NULL, t);
492		break;
493
494	case TIME:
495		syniocf &= ~(KEYWORD|sALIAS);
496		t = pipeline(0);
497		if (t && t->type == TCOM) {
498			t->str = alloc(2, ATEMP);
499			/* TF_* flags */
500			t->str[0] = '\0';
501			t->str[1] = '\0';
502		}
503		t = block(TTIME, t, NULL);
504		break;
505
506	case FUNCTION:
507		musthave(LWORD, 0);
508		t = function_body(yylval.cp, true);
509		break;
510	}
511
512	while ((iop = synio(syniocf)) != NULL) {
513		if (iopn >= NUFILE)
514			yyerror("too many %ss\n", "redirection");
515		iops[iopn++] = iop;
516	}
517
518	if (iopn == 0) {
519		afree(iops, ATEMP);
520		t->ioact = NULL;
521	} else {
522		iops[iopn++] = NULL;
523		iops = aresize2(iops, iopn, sizeof(struct ioword *), ATEMP);
524		t->ioact = iops;
525	}
526
527	if (t->type == TCOM || t->type == TDBRACKET) {
528		XPput(args, NULL);
529		t->args = (const char **)XPclose(args);
530		XPput(vars, NULL);
531		t->vars = (char **)XPclose(vars);
532	} else {
533		XPfree(args);
534		XPfree(vars);
535	}
536
537	if (c == MDPAREN) {
538		t = block(TBRACE, t, NULL);
539		t->ioact = t->left->ioact;
540		t->left->ioact = NULL;
541	}
542
543	return (t);
544}
545
546static struct op *
547dogroup(void)
548{
549	int c;
550	struct op *list;
551
552	c = token(CONTIN|KEYWORD|sALIAS);
553	/*
554	 * A {...} can be used instead of do...done for for/select loops
555	 * but not for while/until loops - we don't need to check if it
556	 * is a while loop because it would have been parsed as part of
557	 * the conditional command list...
558	 */
559	if (c == DO)
560		c = DONE;
561	else if (c == '{')
562		c = '}';
563	else
564		syntaxerr(NULL);
565	list = c_list(true);
566	musthave(c, KEYWORD|sALIAS);
567	return (list);
568}
569
570static struct op *
571thenpart(void)
572{
573	struct op *t;
574
575	musthave(THEN, KEYWORD|sALIAS);
576	t = newtp(0);
577	t->left = c_list(true);
578	if (t->left == NULL)
579		syntaxerr(NULL);
580	t->right = elsepart();
581	return (t);
582}
583
584static struct op *
585elsepart(void)
586{
587	struct op *t;
588
589	switch (token(KEYWORD|sALIAS|CMDASN)) {
590	case ELSE:
591		if ((t = c_list(true)) == NULL)
592			syntaxerr(NULL);
593		return (t);
594
595	case ELIF:
596		t = newtp(TELIF);
597		t->left = c_list(true);
598		t->right = thenpart();
599		return (t);
600
601	default:
602		REJECT;
603	}
604	return (NULL);
605}
606
607static struct op *
608caselist(void)
609{
610	struct op *t, *tl;
611	int c;
612
613	c = token(CONTIN|KEYWORD|sALIAS);
614	/* A {...} can be used instead of in...esac for case statements */
615	if (c == IN)
616		c = ESAC;
617	else if (c == '{')
618		c = '}';
619	else
620		syntaxerr(NULL);
621	t = tl = NULL;
622	/* no ALIAS here */
623	while ((tpeek(CONTIN|KEYWORD|ESACONLY)) != c) {
624		struct op *tc = casepart(c);
625		if (tl == NULL)
626			t = tl = tc, tl->right = NULL;
627		else
628			tl->right = tc, tl = tc;
629	}
630	musthave(c, KEYWORD|sALIAS);
631	return (t);
632}
633
634static struct op *
635casepart(int endtok)
636{
637	struct op *t;
638	XPtrV ptns;
639
640	XPinit(ptns, 16);
641	t = newtp(TPAT);
642	/* no ALIAS here */
643	if (token(CONTIN | KEYWORD) != '(')
644		REJECT;
645	do {
646		switch (token(0)) {
647		case LWORD:
648			break;
649		case '}':
650		case ESAC:
651			if (symbol != endtok) {
652				strdupx(yylval.cp,
653				    symbol == '}' ? Tcbrace : Tesac, ATEMP);
654				break;
655			}
656			/* FALLTHROUGH */
657		default:
658			syntaxerr(NULL);
659		}
660		XPput(ptns, yylval.cp);
661	} while (token(0) == '|');
662	REJECT;
663	XPput(ptns, NULL);
664	t->vars = (char **)XPclose(ptns);
665	musthave(')', 0);
666
667	t->left = c_list(true);
668
669	/* initialise to default for ;; or omitted */
670	t->u.charflag = ';';
671	/* SUSv4 requires the ;; except in the last casepart */
672	if ((tpeek(CONTIN|KEYWORD|sALIAS)) != endtok)
673		switch (symbol) {
674		default:
675			syntaxerr(NULL);
676		case BRKEV:
677			t->u.charflag = '|';
678			if (0)
679				/* FALLTHROUGH */
680		case BRKFT:
681			t->u.charflag = '&';
682			/* FALLTHROUGH */
683		case BREAK:
684			/* initialised above, but we need to eat the token */
685			ACCEPT;
686		}
687	return (t);
688}
689
690static struct op *
691function_body(char *name,
692    /* function foo { ... } vs foo() { .. } */
693    bool ksh_func)
694{
695	char *sname, *p;
696	struct op *t;
697
698	sname = wdstrip(name, 0);
699	/*-
700	 * Check for valid characters in name. POSIX and AT&T ksh93 say
701	 * only allow [a-zA-Z_0-9] but this allows more as old pdkshs
702	 * have allowed more; the following were never allowed:
703	 *	NUL TAB NL SP " $ & ' ( ) ; < = > \ ` |
704	 * C_QUOTE covers all but adds # * ? [ ]
705	 */
706	for (p = sname; *p; p++)
707		if (ctype(*p, C_QUOTE))
708			yyerror("%s: %s\n", sname, "invalid function name");
709
710	/*
711	 * Note that POSIX allows only compound statements after foo(),
712	 * sh and AT&T ksh allow any command, go with the later since it
713	 * shouldn't break anything. However, for function foo, AT&T ksh
714	 * only accepts an open-brace.
715	 */
716	if (ksh_func) {
717		if (tpeek(CONTIN|KEYWORD|sALIAS) == '(' /*)*/) {
718			/* function foo () { //}*/
719			ACCEPT;
720			musthave(')', 0);
721			/* degrade to POSIX function */
722			ksh_func = false;
723		}
724		musthave('{' /*}*/, CONTIN|KEYWORD|sALIAS);
725		REJECT;
726	}
727
728	t = newtp(TFUNCT);
729	t->str = sname;
730	t->u.ksh_func = tobool(ksh_func);
731	t->lineno = source->line;
732
733	if ((t->left = get_command(CONTIN)) == NULL) {
734		char *tv;
735		/*
736		 * Probably something like foo() followed by EOF or ';'.
737		 * This is accepted by sh and ksh88.
738		 * To make "typeset -f foo" work reliably (so its output can
739		 * be used as input), we pretend there is a colon here.
740		 */
741		t->left = newtp(TCOM);
742		/* (2 * sizeof(char *)) is small enough */
743		t->left->args = alloc(2 * sizeof(char *), ATEMP);
744		t->left->args[0] = tv = alloc(3, ATEMP);
745		tv[0] = QCHAR;
746		tv[1] = ':';
747		tv[2] = EOS;
748		t->left->args[1] = NULL;
749		t->left->vars = alloc(sizeof(char *), ATEMP);
750		t->left->vars[0] = NULL;
751		t->left->lineno = 1;
752	}
753
754	return (t);
755}
756
757static char **
758wordlist(void)
759{
760	int c;
761	XPtrV args;
762
763	XPinit(args, 16);
764	/* POSIX does not do alias expansion here... */
765	if ((c = token(CONTIN|KEYWORD|sALIAS)) != IN) {
766		if (c != ';')
767			/* non-POSIX, but AT&T ksh accepts a ; here */
768			REJECT;
769		return (NULL);
770	}
771	while ((c = token(0)) == LWORD)
772		XPput(args, yylval.cp);
773	if (c != '\n' && c != ';')
774		syntaxerr(NULL);
775	XPput(args, NULL);
776	return ((char **)XPclose(args));
777}
778
779/*
780 * supporting functions
781 */
782
783static struct op *
784block(int type, struct op *t1, struct op *t2)
785{
786	struct op *t;
787
788	t = newtp(type);
789	t->left = t1;
790	t->right = t2;
791	return (t);
792}
793
794static const struct tokeninfo {
795	const char *name;
796	short val;
797	short reserved;
798} tokentab[] = {
799	/* Reserved words */
800	{ "if",		IF,	true },
801	{ "then",	THEN,	true },
802	{ "else",	ELSE,	true },
803	{ "elif",	ELIF,	true },
804	{ "fi",		FI,	true },
805	{ "case",	CASE,	true },
806	{ Tesac,	ESAC,	true },
807	{ "for",	FOR,	true },
808	{ Tselect,	SELECT,	true },
809	{ "while",	WHILE,	true },
810	{ "until",	UNTIL,	true },
811	{ "do",		DO,	true },
812	{ "done",	DONE,	true },
813	{ "in",		IN,	true },
814	{ Tfunction,	FUNCTION, true },
815	{ "time",	TIME,	true },
816	{ "{",		'{',	true },
817	{ Tcbrace,	'}',	true },
818	{ "!",		BANG,	true },
819	{ "[[",		DBRACKET, true },
820	/* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
821	{ "&&",		LOGAND,	false },
822	{ "||",		LOGOR,	false },
823	{ ";;",		BREAK,	false },
824	{ ";|",		BRKEV,	false },
825	{ ";&",		BRKFT,	false },
826	{ "((",		MDPAREN, false },
827	{ "|&",		COPROC,	false },
828	/* and some special cases... */
829	{ "newline",	'\n',	false },
830	{ NULL,		0,	false }
831};
832
833void
834initkeywords(void)
835{
836	struct tokeninfo const *tt;
837	struct tbl *p;
838
839	ktinit(APERM, &keywords,
840	    /* currently 28 keywords: 75% of 64 = 2^6 */
841	    6);
842	for (tt = tokentab; tt->name; tt++) {
843		if (tt->reserved) {
844			p = ktenter(&keywords, tt->name, hash(tt->name));
845			p->flag |= DEFINED|ISSET;
846			p->type = CKEYWD;
847			p->val.i = tt->val;
848		}
849	}
850}
851
852static void
853syntaxerr(const char *what)
854{
855	/* 23<<- is the longest redirection, I think */
856	char redir[8];
857	const char *s;
858	struct tokeninfo const *tt;
859	int c;
860
861	if (!what)
862		what = "unexpected";
863	REJECT;
864	c = token(0);
865 Again:
866	switch (c) {
867	case 0:
868		if (nesting.start_token) {
869			c = nesting.start_token;
870			source->errline = nesting.start_line;
871			what = "unmatched";
872			goto Again;
873		}
874		/* don't quote the EOF */
875		yyerror("%s: %s %s\n", Tsynerr, "unexpected", "EOF");
876		/* NOTREACHED */
877
878	case LWORD:
879		s = snptreef(NULL, 32, "%S", yylval.cp);
880		break;
881
882	case REDIR:
883		s = snptreef(redir, sizeof(redir), "%R", yylval.iop);
884		break;
885
886	default:
887		for (tt = tokentab; tt->name; tt++)
888			if (tt->val == c)
889			    break;
890		if (tt->name)
891			s = tt->name;
892		else {
893			if (c > 0 && c < 256) {
894				redir[0] = c;
895				redir[1] = '\0';
896			} else
897				shf_snprintf(redir, sizeof(redir),
898					"?%d", c);
899			s = redir;
900		}
901	}
902	yyerror("%s: '%s' %s\n", Tsynerr, s, what);
903}
904
905static void
906nesting_push(struct nesting_state *save, int tok)
907{
908	*save = nesting;
909	nesting.start_token = tok;
910	nesting.start_line = source->line;
911}
912
913static void
914nesting_pop(struct nesting_state *saved)
915{
916	nesting = *saved;
917}
918
919static struct op *
920newtp(int type)
921{
922	struct op *t;
923
924	t = alloc(sizeof(struct op), ATEMP);
925	t->type = type;
926	t->u.evalflags = 0;
927	t->args = NULL;
928	t->vars = NULL;
929	t->ioact = NULL;
930	t->left = t->right = NULL;
931	t->str = NULL;
932	return (t);
933}
934
935struct op *
936compile(Source *s, bool skiputf8bom)
937{
938	nesting.start_token = 0;
939	nesting.start_line = 0;
940	herep = heres;
941	source = s;
942	if (skiputf8bom)
943		yyskiputf8bom();
944	yyparse();
945	return (outtree);
946}
947
948/*-
949 * This kludge exists to take care of sh/AT&T ksh oddity in which
950 * the arguments of alias/export/readonly/typeset have no field
951 * splitting, file globbing, or (normal) tilde expansion done.
952 * AT&T ksh seems to do something similar to this since
953 *	$ touch a=a; typeset a=[ab]; echo "$a"
954 *	a=[ab]
955 *	$ x=typeset; $x a=[ab]; echo "$a"
956 *	a=a
957 *	$
958 */
959int
960assign_command(const char *s, bool docommand)
961{
962	if (!*s)
963		return (0);
964	return ((strcmp(s, Talias) == 0) ||
965	    (strcmp(s, Texport) == 0) ||
966	    (strcmp(s, Treadonly) == 0) ||
967	    (docommand && (strcmp(s, Tcommand) == 0)) ||
968	    (strcmp(s, Ttypeset) == 0));
969}
970
971/* Check if we are in the middle of reading an alias */
972static int
973inalias(struct source *s)
974{
975	for (; s && s->type == SALIAS; s = s->next)
976		if (!(s->flags & SF_ALIASEND))
977			return (1);
978	return (0);
979}
980
981
982/*
983 * Order important - indexed by Test_meta values
984 * Note that ||, &&, ( and ) can't appear in as unquoted strings
985 * in normal shell input, so these can be interpreted unambiguously
986 * in the evaluation pass.
987 */
988static const char dbtest_or[] = { CHAR, '|', CHAR, '|', EOS };
989static const char dbtest_and[] = { CHAR, '&', CHAR, '&', EOS };
990static const char dbtest_not[] = { CHAR, '!', EOS };
991static const char dbtest_oparen[] = { CHAR, '(', EOS };
992static const char dbtest_cparen[] = { CHAR, ')', EOS };
993const char * const dbtest_tokens[] = {
994	dbtest_or, dbtest_and, dbtest_not,
995	dbtest_oparen, dbtest_cparen
996};
997static const char db_close[] = { CHAR, ']', CHAR, ']', EOS };
998static const char db_lthan[] = { CHAR, '<', EOS };
999static const char db_gthan[] = { CHAR, '>', EOS };
1000
1001/*
1002 * Test if the current token is a whatever. Accepts the current token if
1003 * it is. Returns 0 if it is not, non-zero if it is (in the case of
1004 * TM_UNOP and TM_BINOP, the returned value is a Test_op).
1005 */
1006static Test_op
1007dbtestp_isa(Test_env *te, Test_meta meta)
1008{
1009	int c = tpeek(CMDASN | (meta == TM_BINOP ? 0 : CONTIN));
1010	bool uqword;
1011	char *save = NULL;
1012	Test_op ret = TO_NONOP;
1013
1014	/* unquoted word? */
1015	uqword = c == LWORD && *ident;
1016
1017	if (meta == TM_OR)
1018		ret = c == LOGOR ? TO_NONNULL : TO_NONOP;
1019	else if (meta == TM_AND)
1020		ret = c == LOGAND ? TO_NONNULL : TO_NONOP;
1021	else if (meta == TM_NOT)
1022		ret = (uqword && !strcmp(yylval.cp,
1023		    dbtest_tokens[(int)TM_NOT])) ? TO_NONNULL : TO_NONOP;
1024	else if (meta == TM_OPAREN)
1025		ret = c == '(' /*)*/ ? TO_NONNULL : TO_NONOP;
1026	else if (meta == TM_CPAREN)
1027		ret = c == /*(*/ ')' ? TO_NONNULL : TO_NONOP;
1028	else if (meta == TM_UNOP || meta == TM_BINOP) {
1029		if (meta == TM_BINOP && c == REDIR &&
1030		    (yylval.iop->ioflag == IOREAD ||
1031		    yylval.iop->ioflag == IOWRITE)) {
1032			ret = TO_NONNULL;
1033			save = wdcopy(yylval.iop->ioflag == IOREAD ?
1034			    db_lthan : db_gthan, ATEMP);
1035		} else if (uqword && (ret = test_isop(meta, ident)))
1036			save = yylval.cp;
1037	} else
1038		/* meta == TM_END */
1039		ret = (uqword && !strcmp(yylval.cp,
1040		    db_close)) ? TO_NONNULL : TO_NONOP;
1041	if (ret != TO_NONOP) {
1042		ACCEPT;
1043		if ((unsigned int)meta < NELEM(dbtest_tokens))
1044			save = wdcopy(dbtest_tokens[(int)meta], ATEMP);
1045		if (save)
1046			XPput(*te->pos.av, save);
1047	}
1048	return (ret);
1049}
1050
1051static const char *
1052dbtestp_getopnd(Test_env *te, Test_op op MKSH_A_UNUSED,
1053    bool do_eval MKSH_A_UNUSED)
1054{
1055	int c = tpeek(CMDASN);
1056
1057	if (c != LWORD)
1058		return (NULL);
1059
1060	ACCEPT;
1061	XPput(*te->pos.av, yylval.cp);
1062
1063	return (null);
1064}
1065
1066static int
1067dbtestp_eval(Test_env *te MKSH_A_UNUSED, Test_op op MKSH_A_UNUSED,
1068    const char *opnd1 MKSH_A_UNUSED, const char *opnd2 MKSH_A_UNUSED,
1069    bool do_eval MKSH_A_UNUSED)
1070{
1071	return (1);
1072}
1073
1074static void
1075dbtestp_error(Test_env *te, int offset, const char *msg)
1076{
1077	te->flags |= TEF_ERROR;
1078
1079	if (offset < 0) {
1080		REJECT;
1081		/* Kludgy to say the least... */
1082		symbol = LWORD;
1083		yylval.cp = *(XPptrv(*te->pos.av) + XPsize(*te->pos.av) +
1084		    offset);
1085	}
1086	syntaxerr(msg);
1087}
1088
1089#if HAVE_SELECT
1090
1091#ifndef EOVERFLOW
1092#ifdef ERANGE
1093#define EOVERFLOW	ERANGE
1094#else
1095#define EOVERFLOW	EINVAL
1096#endif
1097#endif
1098
1099bool
1100parse_usec(const char *s, struct timeval *tv)
1101{
1102	struct timeval tt;
1103	int i;
1104
1105	tv->tv_sec = 0;
1106	/* parse integral part */
1107	while (ksh_isdigit(*s)) {
1108		tt.tv_sec = tv->tv_sec * 10 + ksh_numdig(*s++);
1109		/*XXX this overflow check maybe UB */
1110		if (tt.tv_sec / 10 != tv->tv_sec) {
1111			errno = EOVERFLOW;
1112			return (true);
1113		}
1114		tv->tv_sec = tt.tv_sec;
1115	}
1116
1117	tv->tv_usec = 0;
1118	if (!*s)
1119		/* no decimal fraction */
1120		return (false);
1121	else if (*s++ != '.') {
1122		/* junk after integral part */
1123		errno = EINVAL;
1124		return (true);
1125	}
1126
1127	/* parse decimal fraction */
1128	i = 100000;
1129	while (ksh_isdigit(*s)) {
1130		tv->tv_usec += i * ksh_numdig(*s++);
1131		if (i == 1)
1132			break;
1133		i /= 10;
1134	}
1135	/* check for junk after fractional part */
1136	while (ksh_isdigit(*s))
1137		++s;
1138	if (*s) {
1139		errno = EINVAL;
1140		return (true);
1141	}
1142
1143	/* end of input string reached, no errors */
1144	return (false);
1145}
1146#endif
1147
1148/*
1149 * Helper function called from within lex.c:yylex() to parse
1150 * a COMSUB recursively using the main shell parser and lexer
1151 */
1152char *
1153yyrecursive(int subtype MKSH_A_UNUSED)
1154{
1155	struct op *t;
1156	char *cp;
1157	struct yyrecursive_state *ys;
1158	int stok, etok;
1159
1160	if (subtype != COMSUB) {
1161		stok = '{';
1162		etok = '}';
1163	} else {
1164		stok = '(';
1165		etok = ')';
1166	}
1167
1168	ys = alloc(sizeof(struct yyrecursive_state), ATEMP);
1169
1170	/* tell the lexer to accept a closing parenthesis as EOD */
1171	ys->old_nesting_type = subshell_nesting_type;
1172	subshell_nesting_type = etok;
1173
1174	/* push reject state, parse recursively, pop reject state */
1175	ys->old_reject = reject;
1176	ys->old_symbol = symbol;
1177	ACCEPT;
1178	ys->old_herep = herep;
1179	ys->old_salias = sALIAS;
1180	sALIAS = 0;
1181	ys->next = e->yyrecursive_statep;
1182	e->yyrecursive_statep = ys;
1183	/* we use TPAREN as a helper container here */
1184	t = nested(TPAREN, stok, etok);
1185	yyrecursive_pop(false);
1186
1187	/* t->left because nested(TPAREN, ...) hides our goodies there */
1188	cp = snptreef(NULL, 0, "%T", t->left);
1189	tfree(t, ATEMP);
1190
1191	return (cp);
1192}
1193
1194void
1195yyrecursive_pop(bool popall)
1196{
1197	struct yyrecursive_state *ys;
1198
1199 popnext:
1200	if (!(ys = e->yyrecursive_statep))
1201		return;
1202	e->yyrecursive_statep = ys->next;
1203
1204	sALIAS = ys->old_salias;
1205	herep = ys->old_herep;
1206	reject = ys->old_reject;
1207	symbol = ys->old_symbol;
1208
1209	subshell_nesting_type = ys->old_nesting_type;
1210
1211	afree(ys, ATEMP);
1212	if (popall)
1213		goto popnext;
1214}
1215