1/*	$OpenBSD: lex.c,v 1.46 2013/01/20 14:47:46 stsp Exp $	*/
2
3/*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 *		 2011, 2012, 2013
6 *	Thorsten Glaser <tg@mirbsd.org>
7 *
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
13 *
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
22 */
23
24#include "sh.h"
25
26__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.182 2013/02/19 18:45:20 tg Exp $");
27
28/*
29 * states while lexing word
30 */
31#define SBASE		0	/* outside any lexical constructs */
32#define SWORD		1	/* implicit quoting for substitute() */
33#define SLETPAREN	2	/* inside (( )), implicit quoting */
34#define SSQUOTE		3	/* inside '' */
35#define SDQUOTE		4	/* inside "" */
36#define SEQUOTE		5	/* inside $'' */
37#define SBRACE		6	/* inside ${} */
38#define SQBRACE		7	/* inside "${}" */
39#define SBQUOTE		8	/* inside `` */
40#define SASPAREN	9	/* inside $(( )) */
41#define SHEREDELIM	10	/* parsing <<,<<-,<<< delimiter */
42#define SHEREDQUOTE	11	/* parsing " in <<,<<-,<<< delimiter */
43#define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
44#define SADELIM		13	/* like SBASE, looking for delimiter */
45#define STBRACEKORN	14	/* parsing ${...[#%]...} !FSH */
46#define STBRACEBOURNE	15	/* parsing ${...[#%]...} FSH */
47#define SINVALID	255	/* invalid state */
48
49struct sretrace_info {
50	struct sretrace_info *next;
51	XString xs;
52	char *xp;
53};
54
55/*
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
58 */
59typedef struct lex_state {
60	union {
61		/* point to the next state block */
62		struct lex_state *base;
63		/* marks start of state output in output string */
64		int start;
65		/* SBQUOTE: true if in double quotes: "`...`" */
66		/* SEQUOTE: got NUL, ignore rest of string */
67		bool abool;
68		/* SADELIM information */
69		struct {
70			/* character to search for */
71			unsigned char delimiter;
72			/* max. number of delimiters */
73			unsigned char num;
74		} adelim;
75	} u;
76	/* count open parentheses */
77	short nparen;
78	/* type of this state */
79	uint8_t type;
80} Lex_state;
81#define ls_base		u.base
82#define ls_start	u.start
83#define ls_bool		u.abool
84#define ls_adelim	u.adelim
85
86typedef struct {
87	Lex_state *base;
88	Lex_state *end;
89} State_info;
90
91static void readhere(struct ioword *);
92static void ungetsc(int);
93static void ungetsc_i(int);
94static int getsc_uu(void);
95static void getsc_line(Source *);
96static int getsc_bn(void);
97static int s_get(void);
98static void s_put(int);
99static char *get_brace_var(XString *, char *);
100static bool arraysub(char **);
101static void gethere(bool);
102static Lex_state *push_state_i(State_info *, Lex_state *);
103static Lex_state *pop_state_i(State_info *, Lex_state *);
104
105static int dopprompt(const char *, int, bool);
106
107static int backslash_skip;
108static int ignore_backslash_newline;
109
110/* optimised getsc_bn() */
111#define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
112			    !backslash_skip ? *source->str++ : getsc_bn())
113/* optimised getsc_uu() */
114#define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
115
116/* retrace helper */
117#define o_getsc_r(carg)	{				\
118	int cev = (carg);				\
119	struct sretrace_info *rp = retrace_info;	\
120							\
121	while (rp) {					\
122		Xcheck(rp->xs, rp->xp);			\
123		*rp->xp++ = cev;			\
124		rp = rp->next;				\
125	}						\
126							\
127	return (cev);					\
128}
129
130#if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
131static int getsc(void);
132
133static int
134getsc(void)
135{
136	o_getsc_r(o_getsc());
137}
138#else
139static int getsc_r(int);
140
141static int
142getsc_r(int c)
143{
144	o_getsc_r(c);
145}
146
147#define getsc()		getsc_r(o_getsc())
148#endif
149
150#define STATE_BSIZE	8
151
152#define PUSH_STATE(s)	do {					\
153	if (++statep == state_info.end)				\
154		statep = push_state_i(&state_info, statep);	\
155	state = statep->type = (s);				\
156} while (/* CONSTCOND */ 0)
157
158#define POP_STATE()	do {					\
159	if (--statep == state_info.base)			\
160		statep = pop_state_i(&state_info, statep);	\
161	state = statep->type;					\
162} while (/* CONSTCOND */ 0)
163
164#define PUSH_SRETRACE()	do {					\
165	struct sretrace_info *ri;				\
166								\
167	statep->ls_start = Xsavepos(ws, wp);			\
168	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
169	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
170	ri->next = retrace_info;				\
171	retrace_info = ri;					\
172} while (/* CONSTCOND */ 0)
173
174#define POP_SRETRACE()	do {					\
175	wp = Xrestpos(ws, wp, statep->ls_start);		\
176	*retrace_info->xp = '\0';				\
177	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
178	dp = (void *)retrace_info;				\
179	retrace_info = retrace_info->next;			\
180	afree(dp, ATEMP);					\
181} while (/* CONSTCOND */ 0)
182
183/**
184 * Lexical analyser
185 *
186 * tokens are not regular expressions, they are LL(1).
187 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
188 * hence the state stack. Note "$(...)" are now parsed recursively.
189 */
190
191int
192yylex(int cf)
193{
194	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
195	State_info state_info;
196	int c, c2, state;
197	size_t cz;
198	XString ws;		/* expandable output word */
199	char *wp;		/* output word pointer */
200	char *sp, *dp;
201
202 Again:
203	states[0].type = SINVALID;
204	states[0].ls_base = NULL;
205	statep = &states[1];
206	state_info.base = states;
207	state_info.end = &state_info.base[STATE_BSIZE];
208
209	Xinit(ws, wp, 64, ATEMP);
210
211	backslash_skip = 0;
212	ignore_backslash_newline = 0;
213
214	if (cf & ONEWORD)
215		state = SWORD;
216	else if (cf & LETEXPR) {
217		/* enclose arguments in (double) quotes */
218		*wp++ = OQUOTE;
219		state = SLETPAREN;
220		statep->nparen = 0;
221	} else {
222		/* normal lexing */
223		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
224		while ((c = getsc()) == ' ' || c == '\t')
225			;
226		if (c == '#') {
227			ignore_backslash_newline++;
228			while ((c = getsc()) != '\0' && c != '\n')
229				;
230			ignore_backslash_newline--;
231		}
232		ungetsc(c);
233	}
234	if (source->flags & SF_ALIAS) {
235		/* trailing ' ' in alias definition */
236		source->flags &= ~SF_ALIAS;
237		cf |= ALIAS;
238	}
239
240	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
241	statep->type = state;
242
243	/* check for here string */
244	if (state == SHEREDELIM) {
245		c = getsc();
246		if (c == '<') {
247			state = SHEREDELIM;
248			while ((c = getsc()) == ' ' || c == '\t')
249				;
250			ungetsc(c);
251			c = '<';
252			goto accept_nonword;
253		}
254		ungetsc(c);
255	}
256
257	/* collect non-special or quoted characters to form word */
258	while (!((c = getsc()) == 0 ||
259	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
260		if (state == SBASE &&
261		    subshell_nesting_type == /*{*/ '}' &&
262		    c == /*{*/ '}')
263			/* possibly end ${ :;} */
264			break;
265 accept_nonword:
266		Xcheck(ws, wp);
267		switch (state) {
268		case SADELIM:
269			if (c == '(')
270				statep->nparen++;
271			else if (c == ')')
272				statep->nparen--;
273			else if (statep->nparen == 0 && (c == /*{*/ '}' ||
274			    c == (int)statep->ls_adelim.delimiter)) {
275				*wp++ = ADELIM;
276				*wp++ = c;
277				if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
278					POP_STATE();
279				if (c == /*{*/ '}')
280					POP_STATE();
281				break;
282			}
283			/* FALLTHROUGH */
284		case SBASE:
285			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
286				/* temporary */
287				*wp = EOS;
288				if (is_wdvarname(Xstring(ws, wp), false)) {
289					char *p, *tmp;
290
291					if (arraysub(&tmp)) {
292						*wp++ = CHAR;
293						*wp++ = c;
294						for (p = tmp; *p; ) {
295							Xcheck(ws, wp);
296							*wp++ = CHAR;
297							*wp++ = *p++;
298						}
299						afree(tmp, ATEMP);
300						break;
301					} else {
302						Source *s;
303
304						s = pushs(SREREAD,
305						    source->areap);
306						s->start = s->str =
307						    s->u.freeme = tmp;
308						s->next = source;
309						source = s;
310					}
311				}
312				*wp++ = CHAR;
313				*wp++ = c;
314				break;
315			}
316			/* FALLTHROUGH */
317 Sbase1:		/* includes *(...|...) pattern (*+?@!) */
318			if (c == '*' || c == '@' || c == '+' || c == '?' ||
319			    c == '!') {
320				c2 = getsc();
321				if (c2 == '(' /*)*/ ) {
322					*wp++ = OPAT;
323					*wp++ = c;
324					PUSH_STATE(SPATTERN);
325					break;
326				}
327				ungetsc(c2);
328			}
329			/* FALLTHROUGH */
330 Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
331			switch (c) {
332			case '\\':
333 getsc_qchar:
334				if ((c = getsc())) {
335					/* trailing \ is lost */
336					*wp++ = QCHAR;
337					*wp++ = c;
338				}
339				break;
340			case '\'':
341 open_ssquote:
342				*wp++ = OQUOTE;
343				ignore_backslash_newline++;
344				PUSH_STATE(SSQUOTE);
345				break;
346			case '"':
347 open_sdquote:
348				*wp++ = OQUOTE;
349				PUSH_STATE(SDQUOTE);
350				break;
351			case '$':
352				/*
353				 * processing of dollar sign belongs into
354				 * Subst, except for those which can open
355				 * a string: $'…' and $"…"
356				 */
357 subst_dollar_ex:
358				c = getsc();
359				switch (c) {
360				case '"':
361					goto open_sdquote;
362				case '\'':
363					goto open_sequote;
364				default:
365					goto SubstS;
366				}
367			default:
368				goto Subst;
369			}
370			break;
371
372 Subst:
373			switch (c) {
374			case '\\':
375				c = getsc();
376				switch (c) {
377				case '"':
378					if ((cf & HEREDOC))
379						goto heredocquote;
380					/* FALLTHROUGH */
381				case '\\':
382				case '$': case '`':
383 store_qchar:
384					*wp++ = QCHAR;
385					*wp++ = c;
386					break;
387				default:
388 heredocquote:
389					Xcheck(ws, wp);
390					if (c) {
391						/* trailing \ is lost */
392						*wp++ = CHAR;
393						*wp++ = '\\';
394						*wp++ = CHAR;
395						*wp++ = c;
396					}
397					break;
398				}
399				break;
400			case '$':
401				c = getsc();
402 SubstS:
403				if (c == '(') /*)*/ {
404					c = getsc();
405					if (c == '(') /*)*/ {
406						*wp++ = EXPRSUB;
407						PUSH_STATE(SASPAREN);
408						statep->nparen = 2;
409						PUSH_SRETRACE();
410						*retrace_info->xp++ = '(';
411					} else {
412						ungetsc(c);
413 subst_command:
414						c = COMSUB;
415 subst_command2:
416						sp = yyrecursive(c);
417						cz = strlen(sp) + 1;
418						XcheckN(ws, wp, cz);
419						*wp++ = c;
420						memcpy(wp, sp, cz);
421						wp += cz;
422					}
423				} else if (c == '{') /*}*/ {
424					c = getsc();
425					if (ctype(c, C_IFSWS)) {
426						/*
427						 * non-subenvironment
428						 * "command" substitution
429						 */
430						c = FUNSUB;
431						goto subst_command2;
432					}
433					ungetsc(c);
434					*wp++ = OSUBST;
435					*wp++ = '{'; /*}*/
436					wp = get_brace_var(&ws, wp);
437					c = getsc();
438					/* allow :# and :% (ksh88 compat) */
439					if (c == ':') {
440						*wp++ = CHAR;
441						*wp++ = c;
442						c = getsc();
443						if (c == ':') {
444							*wp++ = CHAR;
445							*wp++ = '0';
446							*wp++ = ADELIM;
447							*wp++ = ':';
448							PUSH_STATE(SBRACE);
449							PUSH_STATE(SADELIM);
450							statep->ls_adelim.delimiter = ':';
451							statep->ls_adelim.num = 1;
452							statep->nparen = 0;
453							break;
454						} else if (ksh_isdigit(c) ||
455						    c == '('/*)*/ || c == ' ' ||
456						    /*XXX what else? */
457						    c == '$') {
458							/* substring subst. */
459							if (c != ' ') {
460								*wp++ = CHAR;
461								*wp++ = ' ';
462							}
463							ungetsc(c);
464							PUSH_STATE(SBRACE);
465							PUSH_STATE(SADELIM);
466							statep->ls_adelim.delimiter = ':';
467							statep->ls_adelim.num = 2;
468							statep->nparen = 0;
469							break;
470						}
471					} else if (c == '/') {
472						*wp++ = CHAR;
473						*wp++ = c;
474						if ((c = getsc()) == '/') {
475							*wp++ = ADELIM;
476							*wp++ = c;
477						} else
478							ungetsc(c);
479						PUSH_STATE(SBRACE);
480						PUSH_STATE(SADELIM);
481						statep->ls_adelim.delimiter = '/';
482						statep->ls_adelim.num = 1;
483						statep->nparen = 0;
484						break;
485					}
486					/*
487					 * If this is a trim operation,
488					 * treat (,|,) specially in STBRACE.
489					 */
490					if (ctype(c, C_SUBOP2)) {
491						ungetsc(c);
492						if (Flag(FSH))
493							PUSH_STATE(STBRACEBOURNE);
494						else
495							PUSH_STATE(STBRACEKORN);
496					} else {
497						ungetsc(c);
498						if (state == SDQUOTE)
499							PUSH_STATE(SQBRACE);
500						else
501							PUSH_STATE(SBRACE);
502					}
503				} else if (ksh_isalphx(c)) {
504					*wp++ = OSUBST;
505					*wp++ = 'X';
506					do {
507						Xcheck(ws, wp);
508						*wp++ = c;
509						c = getsc();
510					} while (ksh_isalnux(c));
511					*wp++ = '\0';
512					*wp++ = CSUBST;
513					*wp++ = 'X';
514					ungetsc(c);
515				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
516					Xcheck(ws, wp);
517					*wp++ = OSUBST;
518					*wp++ = 'X';
519					*wp++ = c;
520					*wp++ = '\0';
521					*wp++ = CSUBST;
522					*wp++ = 'X';
523				} else {
524					*wp++ = CHAR;
525					*wp++ = '$';
526					ungetsc(c);
527				}
528				break;
529			case '`':
530 subst_gravis:
531				PUSH_STATE(SBQUOTE);
532				*wp++ = COMSUB;
533				/*
534				 * Need to know if we are inside double quotes
535				 * since sh/AT&T-ksh translate the \" to " in
536				 * "`...\"...`".
537				 * This is not done in POSIX mode (section
538				 * 3.2.3, Double Quotes: "The backquote shall
539				 * retain its special meaning introducing the
540				 * other form of command substitution (see
541				 * 3.6.3). The portion of the quoted string
542				 * from the initial backquote and the
543				 * characters up to the next backquote that
544				 * is not preceded by a backslash (having
545				 * escape characters removed) defines that
546				 * command whose output replaces `...` when
547				 * the word is expanded."
548				 * Section 3.6.3, Command Substitution:
549				 * "Within the backquoted style of command
550				 * substitution, backslash shall retain its
551				 * literal meaning, except when followed by
552				 * $ ` \.").
553				 */
554				statep->ls_bool = false;
555				s2 = statep;
556				base = state_info.base;
557				while (/* CONSTCOND */ 1) {
558					for (; s2 != base; s2--) {
559						if (s2->type == SDQUOTE) {
560							statep->ls_bool = true;
561							break;
562						}
563					}
564					if (s2 != base)
565						break;
566					if (!(s2 = s2->ls_base))
567						break;
568					base = s2-- - STATE_BSIZE;
569				}
570				break;
571			case QCHAR:
572				if (cf & LQCHAR) {
573					*wp++ = QCHAR;
574					*wp++ = getsc();
575					break;
576				}
577				/* FALLTHROUGH */
578			default:
579 store_char:
580				*wp++ = CHAR;
581				*wp++ = c;
582			}
583			break;
584
585		case SEQUOTE:
586			if (c == '\'') {
587				POP_STATE();
588				*wp++ = CQUOTE;
589				ignore_backslash_newline--;
590			} else if (c == '\\') {
591				if ((c2 = unbksl(true, s_get, s_put)) == -1)
592					c2 = s_get();
593				if (c2 == 0)
594					statep->ls_bool = true;
595				if (!statep->ls_bool) {
596					char ts[4];
597
598					if ((unsigned int)c2 < 0x100) {
599						*wp++ = QCHAR;
600						*wp++ = c2;
601					} else {
602						cz = utf_wctomb(ts, c2 - 0x100);
603						ts[cz] = 0;
604						for (cz = 0; ts[cz]; ++cz) {
605							*wp++ = QCHAR;
606							*wp++ = ts[cz];
607						}
608					}
609				}
610			} else if (!statep->ls_bool) {
611				*wp++ = QCHAR;
612				*wp++ = c;
613			}
614			break;
615
616		case SSQUOTE:
617			if (c == '\'') {
618				POP_STATE();
619				*wp++ = CQUOTE;
620				ignore_backslash_newline--;
621			} else {
622				*wp++ = QCHAR;
623				*wp++ = c;
624			}
625			break;
626
627		case SDQUOTE:
628			if (c == '"') {
629				POP_STATE();
630				*wp++ = CQUOTE;
631			} else
632				goto Subst;
633			break;
634
635		/* $(( ... )) */
636		case SASPAREN:
637			if (c == '(')
638				statep->nparen++;
639			else if (c == ')') {
640				statep->nparen--;
641				if (statep->nparen == 1) {
642					/* end of EXPRSUB */
643					POP_SRETRACE();
644					POP_STATE();
645
646					if ((c2 = getsc()) == /*(*/ ')') {
647						cz = strlen(sp) - 2;
648						XcheckN(ws, wp, cz);
649						memcpy(wp, sp + 1, cz);
650						wp += cz;
651						afree(sp, ATEMP);
652						*wp++ = '\0';
653						break;
654					} else {
655						Source *s;
656
657						ungetsc(c2);
658						/*
659						 * mismatched parenthesis -
660						 * assume we were really
661						 * parsing a $(...) expression
662						 */
663						--wp;
664						s = pushs(SREREAD,
665						    source->areap);
666						s->start = s->str =
667						    s->u.freeme = sp;
668						s->next = source;
669						source = s;
670						goto subst_command;
671					}
672				}
673			}
674			/* reuse existing state machine */
675			goto Sbase2;
676
677		case SQBRACE:
678			if (c == '\\') {
679				/*
680				 * perform POSIX "quote removal" if the back-
681				 * slash is "special", i.e. same cases as the
682				 * {case '\\':} in Subst: plus closing brace;
683				 * in mksh code "quote removal" on '\c' means
684				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
685				 * emitted (in heredocquote:)
686				 */
687				if ((c = getsc()) == '"' || c == '\\' ||
688				    c == '$' || c == '`' || c == /*{*/'}')
689					goto store_qchar;
690				goto heredocquote;
691			}
692			goto common_SQBRACE;
693
694		case SBRACE:
695			if (c == '\'')
696				goto open_ssquote;
697			else if (c == '\\')
698				goto getsc_qchar;
699 common_SQBRACE:
700			if (c == '"')
701				goto open_sdquote;
702			else if (c == '$')
703				goto subst_dollar_ex;
704			else if (c == '`')
705				goto subst_gravis;
706			else if (c != /*{*/ '}')
707				goto store_char;
708			POP_STATE();
709			*wp++ = CSUBST;
710			*wp++ = /*{*/ '}';
711			break;
712
713		/* Same as SBASE, except (,|,) treated specially */
714		case STBRACEKORN:
715			if (c == '|')
716				*wp++ = SPAT;
717			else if (c == '(') {
718				*wp++ = OPAT;
719				/* simile for @ */
720				*wp++ = ' ';
721				PUSH_STATE(SPATTERN);
722			} else /* FALLTHROUGH */
723		case STBRACEBOURNE:
724			  if (c == /*{*/ '}') {
725				POP_STATE();
726				*wp++ = CSUBST;
727				*wp++ = /*{*/ '}';
728			} else
729				goto Sbase1;
730			break;
731
732		case SBQUOTE:
733			if (c == '`') {
734				*wp++ = 0;
735				POP_STATE();
736			} else if (c == '\\') {
737				switch (c = getsc()) {
738				case 0:
739					/* trailing \ is lost */
740					break;
741				case '\\':
742				case '$': case '`':
743					*wp++ = c;
744					break;
745				case '"':
746					if (statep->ls_bool) {
747						*wp++ = c;
748						break;
749					}
750					/* FALLTHROUGH */
751				default:
752					*wp++ = '\\';
753					*wp++ = c;
754					break;
755				}
756			} else
757				*wp++ = c;
758			break;
759
760		/* ONEWORD */
761		case SWORD:
762			goto Subst;
763
764		/* LETEXPR: (( ... )) */
765		case SLETPAREN:
766			if (c == /*(*/ ')') {
767				if (statep->nparen > 0)
768					--statep->nparen;
769				else if ((c2 = getsc()) == /*(*/ ')') {
770					c = 0;
771					*wp++ = CQUOTE;
772					goto Done;
773				} else {
774					Source *s;
775
776					ungetsc(c2);
777					/*
778					 * mismatched parenthesis -
779					 * assume we were really
780					 * parsing a (...) expression
781					 */
782					*wp = EOS;
783					sp = Xstring(ws, wp);
784					dp = wdstrip(sp, WDS_KEEPQ);
785					s = pushs(SREREAD, source->areap);
786					s->start = s->str = s->u.freeme = dp;
787					s->next = source;
788					source = s;
789					return ('('/*)*/);
790				}
791			} else if (c == '(')
792				/*
793				 * parentheses inside quotes and
794				 * backslashes are lost, but AT&T ksh
795				 * doesn't count them either
796				 */
797				++statep->nparen;
798			goto Sbase2;
799
800		/* <<, <<-, <<< delimiter */
801		case SHEREDELIM:
802			/*
803			 * here delimiters need a special case since
804			 * $ and `...` are not to be treated specially
805			 */
806			switch (c) {
807			case '\\':
808				if ((c = getsc())) {
809					/* trailing \ is lost */
810					*wp++ = QCHAR;
811					*wp++ = c;
812				}
813				break;
814			case '\'':
815				goto open_ssquote;
816			case '$':
817				if ((c2 = getsc()) == '\'') {
818 open_sequote:
819					*wp++ = OQUOTE;
820					ignore_backslash_newline++;
821					PUSH_STATE(SEQUOTE);
822					statep->ls_bool = false;
823					break;
824				} else if (c2 == '"') {
825					/* FALLTHROUGH */
826			case '"':
827					state = statep->type = SHEREDQUOTE;
828					PUSH_SRETRACE();
829					break;
830				}
831				ungetsc(c2);
832				/* FALLTHROUGH */
833			default:
834				*wp++ = CHAR;
835				*wp++ = c;
836			}
837			break;
838
839		/* " in <<, <<-, <<< delimiter */
840		case SHEREDQUOTE:
841			if (c != '"')
842				goto Subst;
843			POP_SRETRACE();
844			dp = strnul(sp) - 1;
845			/* remove the trailing double quote */
846			*dp = '\0';
847			/* store the quoted string */
848			*wp++ = OQUOTE;
849			XcheckN(ws, wp, (dp - sp));
850			dp = sp;
851			while ((c = *dp++)) {
852				if (c == '\\') {
853					switch ((c = *dp++)) {
854					case '\\':
855					case '"':
856					case '$':
857					case '`':
858						break;
859					default:
860						*wp++ = CHAR;
861						*wp++ = '\\';
862						break;
863					}
864				}
865				*wp++ = CHAR;
866				*wp++ = c;
867			}
868			afree(sp, ATEMP);
869			*wp++ = CQUOTE;
870			state = statep->type = SHEREDELIM;
871			break;
872
873		/* in *(...|...) pattern (*+?@!) */
874		case SPATTERN:
875			if (c == /*(*/ ')') {
876				*wp++ = CPAT;
877				POP_STATE();
878			} else if (c == '|') {
879				*wp++ = SPAT;
880			} else if (c == '(') {
881				*wp++ = OPAT;
882				/* simile for @ */
883				*wp++ = ' ';
884				PUSH_STATE(SPATTERN);
885			} else
886				goto Sbase1;
887			break;
888		}
889	}
890 Done:
891	Xcheck(ws, wp);
892	if (statep != &states[1])
893		/* XXX figure out what is missing */
894		yyerror("no closing quote\n");
895
896	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
897	if (state == SHEREDELIM)
898		state = SBASE;
899
900	dp = Xstring(ws, wp);
901	if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
902		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
903
904		if (Xlength(ws, wp) == 0)
905			iop->unit = c == '<' ? 0 : 1;
906		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
907			if (dp[c2] != CHAR)
908				goto no_iop;
909			if (!ksh_isdigit(dp[c2 + 1]))
910				goto no_iop;
911			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
912		}
913
914		if (iop->unit >= FDBASE)
915			goto no_iop;
916
917		if (c == '&') {
918			if ((c2 = getsc()) != '>') {
919				ungetsc(c2);
920				goto no_iop;
921			}
922			c = c2;
923			iop->flag = IOBASH;
924		} else
925			iop->flag = 0;
926
927		c2 = getsc();
928		/* <<, >>, <> are ok, >< is not */
929		if (c == c2 || (c == '<' && c2 == '>')) {
930			iop->flag |= c == c2 ?
931			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
932			if (iop->flag == IOHERE) {
933				if ((c2 = getsc()) == '-') {
934					iop->flag |= IOSKIP;
935					c2 = getsc();
936				} else if (c2 == '<')
937					iop->flag |= IOHERESTR;
938				ungetsc(c2);
939				if (c2 == '\n')
940					iop->flag |= IONDELIM;
941			}
942		} else if (c2 == '&')
943			iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
944		else {
945			iop->flag |= c == '>' ? IOWRITE : IOREAD;
946			if (c == '>' && c2 == '|')
947				iop->flag |= IOCLOB;
948			else
949				ungetsc(c2);
950		}
951
952		iop->name = NULL;
953		iop->delim = NULL;
954		iop->heredoc = NULL;
955		/* free word */
956		Xfree(ws, wp);
957		yylval.iop = iop;
958		return (REDIR);
959 no_iop:
960		afree(iop, ATEMP);
961	}
962
963	if (wp == dp && state == SBASE) {
964		/* free word */
965		Xfree(ws, wp);
966		/* no word, process LEX1 character */
967		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
968			if ((c2 = getsc()) == c)
969				c = (c == ';') ? BREAK :
970				    (c == '|') ? LOGOR :
971				    (c == '&') ? LOGAND :
972				    /* c == '(' ) */ MDPAREN;
973			else if (c == '|' && c2 == '&')
974				c = COPROC;
975			else if (c == ';' && c2 == '|')
976				c = BRKEV;
977			else if (c == ';' && c2 == '&')
978				c = BRKFT;
979			else
980				ungetsc(c2);
981#ifndef MKSH_SMALL
982			if (c == BREAK) {
983				if ((c2 = getsc()) == '&')
984					c = BRKEV;
985				else
986					ungetsc(c2);
987			}
988#endif
989		} else if (c == '\n') {
990			gethere(false);
991			if (cf & CONTIN)
992				goto Again;
993		} else if (c == '\0')
994			/* need here strings at EOF */
995			gethere(true);
996		return (c);
997	}
998
999	/* terminate word */
1000	*wp++ = EOS;
1001	yylval.cp = Xclose(ws, wp);
1002	if (state == SWORD || state == SLETPAREN
1003	    /* XXX ONEWORD? */)
1004		return (LWORD);
1005
1006	/* unget terminator */
1007	ungetsc(c);
1008
1009	/*
1010	 * note: the alias-vs-function code below depends on several
1011	 * interna: starting from here, source->str is not modified;
1012	 * the way getsc() and ungetsc() operate; etc.
1013	 */
1014
1015	/* copy word to unprefixed string ident */
1016	sp = yylval.cp;
1017	dp = ident;
1018	if ((cf & HEREDELIM) && (sp[1] == '<'))
1019		while ((dp - ident) < IDENT) {
1020			if ((c = *sp++) == CHAR)
1021				*dp++ = *sp++;
1022			else if ((c != OQUOTE) && (c != CQUOTE))
1023				break;
1024		}
1025	else
1026		while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1027			*dp++ = *sp++;
1028	/* Make sure the ident array stays '\0' padded */
1029	memset(dp, 0, (ident + IDENT) - dp + 1);
1030	if (c != EOS)
1031		/* word is not unquoted */
1032		*ident = '\0';
1033
1034	if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1035		struct tbl *p;
1036		uint32_t h = hash(ident);
1037
1038		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1039		    (!(cf & ESACONLY) || p->val.i == ESAC ||
1040		    p->val.i == /*{*/ '}')) {
1041			afree(yylval.cp, ATEMP);
1042			return (p->val.i);
1043		}
1044		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1045		    (p->flag & ISSET)) {
1046			/*
1047			 * this still points to the same character as the
1048			 * ungetsc'd terminator from above
1049			 */
1050			const char *cp = source->str;
1051
1052			/* prefer POSIX but not Korn functions over aliases */
1053			while (*cp == ' ' || *cp == '\t')
1054				/*
1055				 * this is like getsc() without skipping
1056				 * over Source boundaries (including not
1057				 * parsing ungetsc'd characters that got
1058				 * pushed into an SREREAD) which is what
1059				 * we want here anyway: find out whether
1060				 * the alias name is followed by a POSIX
1061				 * function definition (only the opening
1062				 * parenthesis is checked though)
1063				 */
1064				++cp;
1065			/* prefer functions over aliases */
1066			if (cp[0] != '(' || cp[1] != ')') {
1067				Source *s = source;
1068
1069				while (s && (s->flags & SF_HASALIAS))
1070					if (s->u.tblp == p)
1071						return (LWORD);
1072					else
1073						s = s->next;
1074				/* push alias expansion */
1075				s = pushs(SALIAS, source->areap);
1076				s->start = s->str = p->val.s;
1077				s->u.tblp = p;
1078				s->flags |= SF_HASALIAS;
1079				s->next = source;
1080				if (source->type == SEOF) {
1081					/* prevent infinite recursion at EOS */
1082					source->u.tblp = p;
1083					source->flags |= SF_HASALIAS;
1084				}
1085				source = s;
1086				afree(yylval.cp, ATEMP);
1087				goto Again;
1088			}
1089		}
1090	}
1091
1092	return (LWORD);
1093}
1094
1095static void
1096gethere(bool iseof)
1097{
1098	struct ioword **p;
1099
1100	for (p = heres; p < herep; p++)
1101		if (iseof && !((*p)->flag & IOHERESTR))
1102			/* only here strings at EOF */
1103			return;
1104		else
1105			readhere(*p);
1106	herep = heres;
1107}
1108
1109/*
1110 * read "<<word" text into temp file
1111 */
1112
1113static void
1114readhere(struct ioword *iop)
1115{
1116	int c;
1117	const char *eof, *eofp;
1118	XString xs;
1119	char *xp;
1120	int xpos;
1121
1122	if (iop->flag & IOHERESTR) {
1123		/* process the here string */
1124		iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1125		xpos = strlen(xp) - 1;
1126		memmove(xp, xp + 1, xpos);
1127		xp[xpos] = '\n';
1128		return;
1129	}
1130
1131	eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1132
1133	if (!(iop->flag & IOEVAL))
1134		ignore_backslash_newline++;
1135
1136	Xinit(xs, xp, 256, ATEMP);
1137
1138 heredoc_read_line:
1139	/* beginning of line */
1140	eofp = eof;
1141	xpos = Xsavepos(xs, xp);
1142	if (iop->flag & IOSKIP) {
1143		/* skip over leading tabs */
1144		while ((c = getsc()) == '\t')
1145			/* nothing */;
1146		goto heredoc_parse_char;
1147	}
1148 heredoc_read_char:
1149	c = getsc();
1150 heredoc_parse_char:
1151	/* compare with here document marker */
1152	if (!*eofp) {
1153		/* end of here document marker, what to do? */
1154		switch (c) {
1155		case /*(*/ ')':
1156			if (!subshell_nesting_type)
1157				/*-
1158				 * not allowed outside $(...) or (...)
1159				 * => mismatch
1160				 */
1161				break;
1162			/* allow $(...) or (...) to close here */
1163			ungetsc(/*(*/ ')');
1164			/* FALLTHROUGH */
1165		case 0:
1166			/*
1167			 * Allow EOF here to commands without trailing
1168			 * newlines (mksh -c '...') will work as well.
1169			 */
1170		case '\n':
1171			/* Newline terminates here document marker */
1172			goto heredoc_found_terminator;
1173		}
1174	} else if (c == *eofp++)
1175		/* store; then read and compare next character */
1176		goto heredoc_store_and_loop;
1177	/* nope, mismatch; read until end of line */
1178	while (c != '\n') {
1179		if (!c)
1180			/* oops, reached EOF */
1181			yyerror("%s '%s' unclosed\n", "here document", eof);
1182		/* store character */
1183		Xcheck(xs, xp);
1184		Xput(xs, xp, c);
1185		/* read next character */
1186		c = getsc();
1187	}
1188	/* we read a newline as last character */
1189 heredoc_store_and_loop:
1190	/* store character */
1191	Xcheck(xs, xp);
1192	Xput(xs, xp, c);
1193	if (c == '\n')
1194		goto heredoc_read_line;
1195	goto heredoc_read_char;
1196
1197 heredoc_found_terminator:
1198	/* jump back to saved beginning of line */
1199	xp = Xrestpos(xs, xp, xpos);
1200	/* terminate, close and store */
1201	Xput(xs, xp, '\0');
1202	iop->heredoc = Xclose(xs, xp);
1203
1204	if (!(iop->flag & IOEVAL))
1205		ignore_backslash_newline--;
1206}
1207
1208void
1209yyerror(const char *fmt, ...)
1210{
1211	va_list va;
1212
1213	/* pop aliases and re-reads */
1214	while (source->type == SALIAS || source->type == SREREAD)
1215		source = source->next;
1216	/* zap pending input */
1217	source->str = null;
1218
1219	error_prefix(true);
1220	va_start(va, fmt);
1221	shf_vfprintf(shl_out, fmt, va);
1222	va_end(va);
1223	errorfz();
1224}
1225
1226/*
1227 * input for yylex with alias expansion
1228 */
1229
1230Source *
1231pushs(int type, Area *areap)
1232{
1233	Source *s;
1234
1235	s = alloc(sizeof(Source), areap);
1236	memset(s, 0, sizeof(Source));
1237	s->type = type;
1238	s->str = null;
1239	s->areap = areap;
1240	if (type == SFILE || type == SSTDIN)
1241		XinitN(s->xs, 256, s->areap);
1242	return (s);
1243}
1244
1245static int
1246getsc_uu(void)
1247{
1248	Source *s = source;
1249	int c;
1250
1251	while ((c = *s->str++) == 0) {
1252		/* return 0 for EOF by default */
1253		s->str = NULL;
1254		switch (s->type) {
1255		case SEOF:
1256			s->str = null;
1257			return (0);
1258
1259		case SSTDIN:
1260		case SFILE:
1261			getsc_line(s);
1262			break;
1263
1264		case SWSTR:
1265			break;
1266
1267		case SSTRING:
1268		case SSTRINGCMDLINE:
1269			break;
1270
1271		case SWORDS:
1272			s->start = s->str = *s->u.strv++;
1273			s->type = SWORDSEP;
1274			break;
1275
1276		case SWORDSEP:
1277			if (*s->u.strv == NULL) {
1278				s->start = s->str = "\n";
1279				s->type = SEOF;
1280			} else {
1281				s->start = s->str = " ";
1282				s->type = SWORDS;
1283			}
1284			break;
1285
1286		case SALIAS:
1287			if (s->flags & SF_ALIASEND) {
1288				/* pass on an unused SF_ALIAS flag */
1289				source = s->next;
1290				source->flags |= s->flags & SF_ALIAS;
1291				s = source;
1292			} else if (*s->u.tblp->val.s &&
1293			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1294				/* pop source stack */
1295				source = s = s->next;
1296				/*
1297				 * Note that this alias ended with a
1298				 * space, enabling alias expansion on
1299				 * the following word.
1300				 */
1301				s->flags |= SF_ALIAS;
1302			} else {
1303				/*
1304				 * At this point, we need to keep the current
1305				 * alias in the source list so recursive
1306				 * aliases can be detected and we also need to
1307				 * return the next character. Do this by
1308				 * temporarily popping the alias to get the
1309				 * next character and then put it back in the
1310				 * source list with the SF_ALIASEND flag set.
1311				 */
1312				/* pop source stack */
1313				source = s->next;
1314				source->flags |= s->flags & SF_ALIAS;
1315				c = getsc_uu();
1316				if (c) {
1317					s->flags |= SF_ALIASEND;
1318					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1319					s->start = s->str = s->ugbuf;
1320					s->next = source;
1321					source = s;
1322				} else {
1323					s = source;
1324					/* avoid reading EOF twice */
1325					s->str = NULL;
1326					break;
1327				}
1328			}
1329			continue;
1330
1331		case SREREAD:
1332			if (s->start != s->ugbuf)
1333				/* yuck */
1334				afree(s->u.freeme, ATEMP);
1335			source = s = s->next;
1336			continue;
1337		}
1338		if (s->str == NULL) {
1339			s->type = SEOF;
1340			s->start = s->str = null;
1341			return ('\0');
1342		}
1343		if (s->flags & SF_ECHO) {
1344			shf_puts(s->str, shl_out);
1345			shf_flush(shl_out);
1346		}
1347	}
1348	return (c);
1349}
1350
1351static void
1352getsc_line(Source *s)
1353{
1354	char *xp = Xstring(s->xs, xp), *cp;
1355	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1356	bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1357
1358	/* Done here to ensure nothing odd happens when a timeout occurs */
1359	XcheckN(s->xs, xp, LINE);
1360	*xp = '\0';
1361	s->start = s->str = xp;
1362
1363	if (have_tty && ksh_tmout) {
1364		ksh_tmout_state = TMOUT_READING;
1365		alarm(ksh_tmout);
1366	}
1367	if (interactive)
1368		change_winsz();
1369#ifndef MKSH_NO_CMDLINE_EDITING
1370	if (have_tty && (
1371#if !MKSH_S_NOVI
1372	    Flag(FVI) ||
1373#endif
1374	    Flag(FEMACS) || Flag(FGMACS))) {
1375		int nread;
1376
1377		nread = x_read(xp, LINE);
1378		if (nread < 0)
1379			/* read error */
1380			nread = 0;
1381		xp[nread] = '\0';
1382		xp += nread;
1383	} else
1384#endif
1385	  {
1386		if (interactive)
1387			pprompt(prompt, 0);
1388		else
1389			s->line++;
1390
1391		while (/* CONSTCOND */ 1) {
1392			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1393
1394			if (!p && shf_error(s->u.shf) &&
1395			    shf_errno(s->u.shf) == EINTR) {
1396				shf_clearerr(s->u.shf);
1397				if (trap)
1398					runtraps(0);
1399				continue;
1400			}
1401			if (!p || (xp = p, xp[-1] == '\n'))
1402				break;
1403			/* double buffer size */
1404			/* move past NUL so doubling works... */
1405			xp++;
1406			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1407			/* ...and move back again */
1408			xp--;
1409		}
1410		/*
1411		 * flush any unwanted input so other programs/builtins
1412		 * can read it. Not very optimal, but less error prone
1413		 * than flushing else where, dealing with redirections,
1414		 * etc.
1415		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1416		 */
1417		if (s->type == SSTDIN)
1418			shf_flush(s->u.shf);
1419	}
1420	/*
1421	 * XXX: temporary kludge to restore source after a
1422	 * trap may have been executed.
1423	 */
1424	source = s;
1425	if (have_tty && ksh_tmout) {
1426		ksh_tmout_state = TMOUT_EXECUTING;
1427		alarm(0);
1428	}
1429	cp = Xstring(s->xs, xp);
1430	s->start = s->str = cp;
1431	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1432	/* Note: if input is all nulls, this is not eof */
1433	if (Xlength(s->xs, xp) == 0) {
1434		/* EOF */
1435		if (s->type == SFILE)
1436			shf_fdclose(s->u.shf);
1437		s->str = NULL;
1438	} else if (interactive && *s->str &&
1439	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1440		histsave(&s->line, s->str, true, true);
1441#if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1442	} else if (interactive && cur_prompt == PS1) {
1443		cp = Xstring(s->xs, xp);
1444		while (*cp && ctype(*cp, C_IFSWS))
1445			++cp;
1446		if (!*cp)
1447			histsync();
1448#endif
1449	}
1450	if (interactive)
1451		set_prompt(PS2, NULL);
1452}
1453
1454void
1455set_prompt(int to, Source *s)
1456{
1457	cur_prompt = to;
1458
1459	switch (to) {
1460	/* command */
1461	case PS1:
1462		/*
1463		 * Substitute ! and !! here, before substitutions are done
1464		 * so ! in expanded variables are not expanded.
1465		 * NOTE: this is not what AT&T ksh does (it does it after
1466		 * substitutions, POSIX doesn't say which is to be done.
1467		 */
1468		{
1469			struct shf *shf;
1470			char * volatile ps1;
1471			Area *saved_atemp;
1472
1473			ps1 = str_val(global("PS1"));
1474			shf = shf_sopen(NULL, strlen(ps1) * 2,
1475			    SHF_WR | SHF_DYNAMIC, NULL);
1476			while (*ps1)
1477				if (*ps1 != '!' || *++ps1 == '!')
1478					shf_putchar(*ps1++, shf);
1479				else
1480					shf_fprintf(shf, "%d",
1481						s ? s->line + 1 : 0);
1482			ps1 = shf_sclose(shf);
1483			saved_atemp = ATEMP;
1484			newenv(E_ERRH);
1485			if (kshsetjmp(e->jbuf)) {
1486				prompt = safe_prompt;
1487				/*
1488				 * Don't print an error - assume it has already
1489				 * been printed. Reason is we may have forked
1490				 * to run a command and the child may be
1491				 * unwinding its stack through this code as it
1492				 * exits.
1493				 */
1494			} else {
1495				char *cp = substitute(ps1, 0);
1496				strdupx(prompt, cp, saved_atemp);
1497			}
1498			quitenv(NULL);
1499		}
1500		break;
1501	/* command continuation */
1502	case PS2:
1503		prompt = str_val(global("PS2"));
1504		break;
1505	}
1506}
1507
1508static int
1509dopprompt(const char *cp, int ntruncate, bool doprint)
1510{
1511	int columns = 0, lines = 0;
1512	bool indelimit = false;
1513	char delimiter = 0;
1514
1515	/*
1516	 * Undocumented AT&T ksh feature:
1517	 * If the second char in the prompt string is \r then the first
1518	 * char is taken to be a non-printing delimiter and any chars
1519	 * between two instances of the delimiter are not considered to
1520	 * be part of the prompt length
1521	 */
1522	if (*cp && cp[1] == '\r') {
1523		delimiter = *cp;
1524		cp += 2;
1525	}
1526	for (; *cp; cp++) {
1527		if (indelimit && *cp != delimiter)
1528			;
1529		else if (*cp == '\n' || *cp == '\r') {
1530			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1531			columns = 0;
1532		} else if (*cp == '\t') {
1533			columns = (columns | 7) + 1;
1534		} else if (*cp == '\b') {
1535			if (columns > 0)
1536				columns--;
1537		} else if (*cp == delimiter)
1538			indelimit = !indelimit;
1539		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1540			const char *cp2;
1541			columns += utf_widthadj(cp, &cp2);
1542			if (doprint && (indelimit ||
1543			    (ntruncate < (x_cols * lines + columns))))
1544				shf_write(cp, cp2 - cp, shl_out);
1545			cp = cp2 - /* loop increment */ 1;
1546			continue;
1547		} else
1548			columns++;
1549		if (doprint && (*cp != delimiter) &&
1550		    (indelimit || (ntruncate < (x_cols * lines + columns))))
1551			shf_putc(*cp, shl_out);
1552	}
1553	if (doprint)
1554		shf_flush(shl_out);
1555	return (x_cols * lines + columns);
1556}
1557
1558
1559void
1560pprompt(const char *cp, int ntruncate)
1561{
1562	dopprompt(cp, ntruncate, true);
1563}
1564
1565int
1566promptlen(const char *cp)
1567{
1568	return (dopprompt(cp, 0, false));
1569}
1570
1571/*
1572 * Read the variable part of a ${...} expression (i.e. up to but not
1573 * including the :[-+?=#%] or close-brace).
1574 */
1575static char *
1576get_brace_var(XString *wsp, char *wp)
1577{
1578	char c;
1579	enum parse_state {
1580		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1581		PS_NUMBER, PS_VAR1
1582	} state = PS_INITIAL;
1583
1584	while (/* CONSTCOND */ 1) {
1585		c = getsc();
1586		/* State machine to figure out where the variable part ends. */
1587		switch (state) {
1588		case PS_INITIAL:
1589			if (c == '#' || c == '!' || c == '%') {
1590				state = PS_SAW_HASH;
1591				break;
1592			}
1593			/* FALLTHROUGH */
1594		case PS_SAW_HASH:
1595			if (ksh_isalphx(c))
1596				state = PS_IDENT;
1597			else if (ksh_isdigit(c))
1598				state = PS_NUMBER;
1599			else if (c == '#') {
1600				if (state == PS_SAW_HASH) {
1601					char c2;
1602
1603					c2 = getsc();
1604					ungetsc(c2);
1605					if (c2 != /*{*/ '}') {
1606						ungetsc(c);
1607						goto out;
1608					}
1609				}
1610				state = PS_VAR1;
1611			} else if (ctype(c, C_VAR1))
1612				state = PS_VAR1;
1613			else
1614				goto out;
1615			break;
1616		case PS_IDENT:
1617			if (!ksh_isalnux(c)) {
1618				if (c == '[') {
1619					char *tmp, *p;
1620
1621					if (!arraysub(&tmp))
1622						yyerror("missing ]\n");
1623					*wp++ = c;
1624					for (p = tmp; *p; ) {
1625						Xcheck(*wsp, wp);
1626						*wp++ = *p++;
1627					}
1628					afree(tmp, ATEMP);
1629					/* the ] */
1630					c = getsc();
1631				}
1632				goto out;
1633			}
1634			break;
1635		case PS_NUMBER:
1636			if (!ksh_isdigit(c))
1637				goto out;
1638			break;
1639		case PS_VAR1:
1640			goto out;
1641		}
1642		Xcheck(*wsp, wp);
1643		*wp++ = c;
1644	}
1645 out:
1646	/* end of variable part */
1647	*wp++ = '\0';
1648	ungetsc(c);
1649	return (wp);
1650}
1651
1652/*
1653 * Save an array subscript - returns true if matching bracket found, false
1654 * if eof or newline was found.
1655 * (Returned string double null terminated)
1656 */
1657static bool
1658arraysub(char **strp)
1659{
1660	XString ws;
1661	char *wp, c;
1662	/* we are just past the initial [ */
1663	unsigned int depth = 1;
1664
1665	Xinit(ws, wp, 32, ATEMP);
1666
1667	do {
1668		c = getsc();
1669		Xcheck(ws, wp);
1670		*wp++ = c;
1671		if (c == '[')
1672			depth++;
1673		else if (c == ']')
1674			depth--;
1675	} while (depth > 0 && c && c != '\n');
1676
1677	*wp++ = '\0';
1678	*strp = Xclose(ws, wp);
1679
1680	return (tobool(depth == 0));
1681}
1682
1683/* Unget a char: handles case when we are already at the start of the buffer */
1684static void
1685ungetsc(int c)
1686{
1687	struct sretrace_info *rp = retrace_info;
1688
1689	if (backslash_skip)
1690		backslash_skip--;
1691	/* Don't unget EOF... */
1692	if (source->str == null && c == '\0')
1693		return;
1694	while (rp) {
1695		if (Xlength(rp->xs, rp->xp))
1696			rp->xp--;
1697		rp = rp->next;
1698	}
1699	ungetsc_i(c);
1700}
1701static void
1702ungetsc_i(int c)
1703{
1704	if (source->str > source->start)
1705		source->str--;
1706	else {
1707		Source *s;
1708
1709		s = pushs(SREREAD, source->areap);
1710		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1711		s->start = s->str = s->ugbuf;
1712		s->next = source;
1713		source = s;
1714	}
1715}
1716
1717
1718/* Called to get a char that isn't a \newline sequence. */
1719static int
1720getsc_bn(void)
1721{
1722	int c, c2;
1723
1724	if (ignore_backslash_newline)
1725		return (o_getsc_u());
1726
1727	if (backslash_skip == 1) {
1728		backslash_skip = 2;
1729		return (o_getsc_u());
1730	}
1731
1732	backslash_skip = 0;
1733
1734	while (/* CONSTCOND */ 1) {
1735		c = o_getsc_u();
1736		if (c == '\\') {
1737			if ((c2 = o_getsc_u()) == '\n')
1738				/* ignore the \newline; get the next char... */
1739				continue;
1740			ungetsc_i(c2);
1741			backslash_skip = 1;
1742		}
1743		return (c);
1744	}
1745}
1746
1747void
1748yyskiputf8bom(void)
1749{
1750	int c;
1751
1752	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1753		ungetsc_i(c);
1754		return;
1755	}
1756	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1757		ungetsc_i(c);
1758		ungetsc_i(0xEF);
1759		return;
1760	}
1761	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1762		ungetsc_i(c);
1763		ungetsc_i(0xBB);
1764		ungetsc_i(0xEF);
1765		return;
1766	}
1767	UTFMODE |= 8;
1768}
1769
1770static Lex_state *
1771push_state_i(State_info *si, Lex_state *old_end)
1772{
1773	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1774
1775	news[0].ls_base = old_end;
1776	si->base = &news[0];
1777	si->end = &news[STATE_BSIZE];
1778	return (&news[1]);
1779}
1780
1781static Lex_state *
1782pop_state_i(State_info *si, Lex_state *old_end)
1783{
1784	Lex_state *old_base = si->base;
1785
1786	si->base = old_end->ls_base - STATE_BSIZE;
1787	si->end = old_end->ls_base;
1788
1789	afree(old_base, ATEMP);
1790
1791	return (si->base + STATE_BSIZE - 1);
1792}
1793
1794static int
1795s_get(void)
1796{
1797	return (getsc());
1798}
1799
1800static void
1801s_put(int c)
1802{
1803	ungetsc(c);
1804}
1805