1/*	$OpenBSD: lex.c,v 1.45 2011/03/09 09:30:39 okan Exp $	*/
2
3/*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 *	Thorsten Glaser <tg@mirbsd.org>
6 *
7 * Provided that these terms and disclaimer and all copyright notices
8 * are retained or reproduced in an accompanying document, permission
9 * is granted to deal in this work without restriction, including un-
10 * limited rights to use, publicly perform, distribute, sell, modify,
11 * merge, give away, or sublicence.
12 *
13 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
14 * the utmost extent permitted by applicable law, neither express nor
15 * implied; without malicious intent or gross negligence. In no event
16 * may a licensor, author or contributor be held liable for indirect,
17 * direct, other damage, loss, or other issues arising in any way out
18 * of dealing in the work, even if advised of the possibility of such
19 * damage or existence of a defect, except proven that it results out
20 * of said person's immediate fault when using the work as intended.
21 */
22
23#include "sh.h"
24
25__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.156 2011/09/07 15:24:16 tg Exp $");
26
27/*
28 * states while lexing word
29 */
30#define SBASE		0	/* outside any lexical constructs */
31#define SWORD		1	/* implicit quoting for substitute() */
32#define SLETPAREN	2	/* inside (( )), implicit quoting */
33#define SSQUOTE		3	/* inside '' */
34#define SDQUOTE		4	/* inside "" */
35#define SEQUOTE		5	/* inside $'' */
36#define SBRACE		6	/* inside ${} */
37#define SQBRACE		7	/* inside "${}" */
38#define SBQUOTE		8	/* inside `` */
39#define SASPAREN	9	/* inside $(( )) */
40#define SHEREDELIM	10	/* parsing <<,<<- delimiter */
41#define SHEREDQUOTE	11	/* parsing " in <<,<<- delimiter */
42#define SPATTERN	12	/* parsing *(...|...) pattern (*+?@!) */
43#define SADELIM		13	/* like SBASE, looking for delimiter */
44#define SHERESTRING	14	/* parsing <<< string */
45#define STBRACEKORN	15	/* parsing ${...[#%]...} !FSH */
46#define STBRACEBOURNE	16	/* parsing ${...[#%]...} FSH */
47#define SINVALID	255	/* invalid state */
48
49struct sretrace_info {
50	struct sretrace_info *next;
51	XString xs;
52	char *xp;
53};
54
55/*
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
58 */
59typedef struct lex_state {
60	union {
61		/* point to the next state block */
62		struct lex_state *base;
63		/* marks start of state output in output string */
64		int start;
65		/* SBQUOTE: true if in double quotes: "`...`" */
66		/* SEQUOTE: got NUL, ignore rest of string */
67		bool abool;
68		/* SADELIM information */
69		struct {
70			/* character to search for */
71			unsigned char delimiter;
72			/* max. number of delimiters */
73			unsigned char num;
74		} adelim;
75	} u;
76	/* count open parentheses */
77	short nparen;
78	/* type of this state */
79	uint8_t type;
80} Lex_state;
81#define ls_base		u.base
82#define ls_start	u.start
83#define ls_bool		u.abool
84#define ls_adelim	u.adelim
85
86typedef struct {
87	Lex_state *base;
88	Lex_state *end;
89} State_info;
90
91static void readhere(struct ioword *);
92static void ungetsc(int);
93static void ungetsc_(int);
94static int getsc_uu(void);
95static void getsc_line(Source *);
96static int getsc_bn(void);
97static int s_get(void);
98static void s_put(int);
99static char *get_brace_var(XString *, char *);
100static bool arraysub(char **);
101static void gethere(bool);
102static Lex_state *push_state_(State_info *, Lex_state *);
103static Lex_state *pop_state_(State_info *, Lex_state *);
104
105static int dopprompt(const char *, int, bool);
106void yyskiputf8bom(void);
107
108static int backslash_skip;
109static int ignore_backslash_newline;
110static struct sretrace_info *retrace_info;
111short subshell_nesting_level = 0;
112
113/* optimised getsc_bn() */
114#define o_getsc()	(*source->str != '\0' && *source->str != '\\' && \
115			    !backslash_skip ? *source->str++ : getsc_bn())
116/* optimised getsc_uu() */
117#define	o_getsc_u()	((*source->str != '\0') ? *source->str++ : getsc_uu())
118
119/* retrace helper */
120#define o_getsc_r(carg)	{				\
121	int cev = (carg);				\
122	struct sretrace_info *rp = retrace_info;	\
123							\
124	while (rp) {					\
125		Xcheck(rp->xs, rp->xp);			\
126		*rp->xp++ = cev;			\
127		rp = rp->next;				\
128	}						\
129							\
130	return (cev);					\
131}
132
133#ifdef MKSH_SMALL
134static int getsc(void);
135
136static int
137getsc(void)
138{
139	o_getsc_r(o_getsc());
140}
141#else
142static int getsc_r(int);
143
144static int
145getsc_r(int c)
146{
147	o_getsc_r(c);
148}
149
150#define getsc()		getsc_r(o_getsc())
151#endif
152
153#define STATE_BSIZE	8
154
155#define PUSH_STATE(s)	do {					\
156	if (++statep == state_info.end)				\
157		statep = push_state_(&state_info, statep);	\
158	state = statep->type = (s);				\
159} while (/* CONSTCOND */ 0)
160
161#define POP_STATE()	do {					\
162	if (--statep == state_info.base)			\
163		statep = pop_state_(&state_info, statep);	\
164	state = statep->type;					\
165} while (/* CONSTCOND */ 0)
166
167#define PUSH_SRETRACE()	do {					\
168	struct sretrace_info *ri;				\
169								\
170	statep->ls_start = Xsavepos(ws, wp);			\
171	ri = alloc(sizeof(struct sretrace_info), ATEMP);	\
172	Xinit(ri->xs, ri->xp, 64, ATEMP);			\
173	ri->next = retrace_info;				\
174	retrace_info = ri;					\
175} while (/* CONSTCOND */ 0)
176
177#define POP_SRETRACE()	do {					\
178	wp = Xrestpos(ws, wp, statep->ls_start);		\
179	*retrace_info->xp = '\0';				\
180	sp = Xstring(retrace_info->xs, retrace_info->xp);	\
181	dp = (void *)retrace_info;				\
182	retrace_info = retrace_info->next;			\
183	afree(dp, ATEMP);					\
184} while (/* CONSTCOND */ 0)
185
186/**
187 * Lexical analyser
188 *
189 * tokens are not regular expressions, they are LL(1).
190 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
191 * hence the state stack. Note "$(...)" are now parsed recursively.
192 */
193
194int
195yylex(int cf)
196{
197	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
198	State_info state_info;
199	int c, c2, state;
200	size_t cz;
201	XString ws;		/* expandable output word */
202	char *wp;		/* output word pointer */
203	char *sp, *dp;
204
205 Again:
206	states[0].type = SINVALID;
207	states[0].ls_base = NULL;
208	statep = &states[1];
209	state_info.base = states;
210	state_info.end = &state_info.base[STATE_BSIZE];
211
212	Xinit(ws, wp, 64, ATEMP);
213
214	backslash_skip = 0;
215	ignore_backslash_newline = 0;
216
217	if (cf & ONEWORD)
218		state = SWORD;
219	else if (cf & LETEXPR) {
220		/* enclose arguments in (double) quotes */
221		*wp++ = OQUOTE;
222		state = SLETPAREN;
223		statep->nparen = 0;
224	} else {
225		/* normal lexing */
226		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
227		while ((c = getsc()) == ' ' || c == '\t')
228			;
229		if (c == '#') {
230			ignore_backslash_newline++;
231			while ((c = getsc()) != '\0' && c != '\n')
232				;
233			ignore_backslash_newline--;
234		}
235		ungetsc(c);
236	}
237	if (source->flags & SF_ALIAS) {
238		/* trailing ' ' in alias definition */
239		source->flags &= ~SF_ALIAS;
240		cf |= ALIAS;
241	}
242
243	/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
244	statep->type = state;
245
246	/* check for here string */
247	if (state == SHEREDELIM) {
248		c = getsc();
249		if (c == '<') {
250			state = SHERESTRING;
251			while ((c = getsc()) == ' ' || c == '\t')
252				;
253			ungetsc(c);
254			c = '<';
255			goto accept_nonword;
256		}
257		ungetsc(c);
258	}
259
260	/* collect non-special or quoted characters to form word */
261	while (!((c = getsc()) == 0 ||
262	    ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
263	    ctype(c, C_LEX1)))) {
264 accept_nonword:
265		Xcheck(ws, wp);
266		switch (state) {
267		case SADELIM:
268			if (c == '(')
269				statep->nparen++;
270			else if (c == ')')
271				statep->nparen--;
272			else if (statep->nparen == 0 &&
273			    (c == /*{*/ '}' || c == statep->ls_adelim.delimiter)) {
274				*wp++ = ADELIM;
275				*wp++ = c;
276				if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
277					POP_STATE();
278				if (c == /*{*/ '}')
279					POP_STATE();
280				break;
281			}
282			/* FALLTHROUGH */
283		case SBASE:
284			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
285				/* temporary */
286				*wp = EOS;
287				if (is_wdvarname(Xstring(ws, wp), false)) {
288					char *p, *tmp;
289
290					if (arraysub(&tmp)) {
291						*wp++ = CHAR;
292						*wp++ = c;
293						for (p = tmp; *p; ) {
294							Xcheck(ws, wp);
295							*wp++ = CHAR;
296							*wp++ = *p++;
297						}
298						afree(tmp, ATEMP);
299						break;
300					} else {
301						Source *s;
302
303						s = pushs(SREREAD,
304						    source->areap);
305						s->start = s->str =
306						    s->u.freeme = tmp;
307						s->next = source;
308						source = s;
309					}
310				}
311				*wp++ = CHAR;
312				*wp++ = c;
313				break;
314			}
315			/* FALLTHROUGH */
316 Sbase1:		/* includes *(...|...) pattern (*+?@!) */
317			if (c == '*' || c == '@' || c == '+' || c == '?' ||
318			    c == '!') {
319				c2 = getsc();
320				if (c2 == '(' /*)*/ ) {
321					*wp++ = OPAT;
322					*wp++ = c;
323					PUSH_STATE(SPATTERN);
324					break;
325				}
326				ungetsc(c2);
327			}
328			/* FALLTHROUGH */
329 Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
330			switch (c) {
331			case '\\':
332 getsc_qchar:
333				if ((c = getsc())) {
334					/* trailing \ is lost */
335					*wp++ = QCHAR;
336					*wp++ = c;
337				}
338				break;
339			case '\'':
340 open_ssquote:
341				*wp++ = OQUOTE;
342				ignore_backslash_newline++;
343				PUSH_STATE(SSQUOTE);
344				break;
345			case '"':
346 open_sdquote:
347				*wp++ = OQUOTE;
348				PUSH_STATE(SDQUOTE);
349				break;
350			default:
351				goto Subst;
352			}
353			break;
354
355 Subst:
356			switch (c) {
357			case '\\':
358				c = getsc();
359				switch (c) {
360				case '"':
361					if ((cf & HEREDOC))
362						goto heredocquote;
363					/* FALLTHROUGH */
364				case '\\':
365				case '$': case '`':
366 store_qchar:
367					*wp++ = QCHAR;
368					*wp++ = c;
369					break;
370				default:
371 heredocquote:
372					Xcheck(ws, wp);
373					if (c) {
374						/* trailing \ is lost */
375						*wp++ = CHAR;
376						*wp++ = '\\';
377						*wp++ = CHAR;
378						*wp++ = c;
379					}
380					break;
381				}
382				break;
383			case '$':
384 subst_dollar:
385				c = getsc();
386				if (c == '(') /*)*/ {
387					c = getsc();
388					if (c == '(') /*)*/ {
389						*wp++ = EXPRSUB;
390						PUSH_STATE(SASPAREN);
391						statep->nparen = 2;
392						PUSH_SRETRACE();
393						*retrace_info->xp++ = '(';
394					} else {
395						ungetsc(c);
396 subst_command:
397						sp = yyrecursive();
398						cz = strlen(sp) + 1;
399						XcheckN(ws, wp, cz);
400						*wp++ = COMSUB;
401						memcpy(wp, sp, cz);
402						wp += cz;
403					}
404				} else if (c == '{') /*}*/ {
405					*wp++ = OSUBST;
406					*wp++ = '{'; /*}*/
407					wp = get_brace_var(&ws, wp);
408					c = getsc();
409					/* allow :# and :% (ksh88 compat) */
410					if (c == ':') {
411						*wp++ = CHAR;
412						*wp++ = c;
413						c = getsc();
414						if (c == ':') {
415							*wp++ = CHAR;
416							*wp++ = '0';
417							*wp++ = ADELIM;
418							*wp++ = ':';
419							PUSH_STATE(SBRACE);
420							PUSH_STATE(SADELIM);
421							statep->ls_adelim.delimiter = ':';
422							statep->ls_adelim.num = 1;
423							statep->nparen = 0;
424							break;
425						} else if (ksh_isdigit(c) ||
426						    c == '('/*)*/ || c == ' ' ||
427						    /*XXX what else? */
428						    c == '$') {
429							/* substring subst. */
430							if (c != ' ') {
431								*wp++ = CHAR;
432								*wp++ = ' ';
433							}
434							ungetsc(c);
435							PUSH_STATE(SBRACE);
436							PUSH_STATE(SADELIM);
437							statep->ls_adelim.delimiter = ':';
438							statep->ls_adelim.num = 2;
439							statep->nparen = 0;
440							break;
441						}
442					} else if (c == '/') {
443						*wp++ = CHAR;
444						*wp++ = c;
445						if ((c = getsc()) == '/') {
446							*wp++ = ADELIM;
447							*wp++ = c;
448						} else
449							ungetsc(c);
450						PUSH_STATE(SBRACE);
451						PUSH_STATE(SADELIM);
452						statep->ls_adelim.delimiter = '/';
453						statep->ls_adelim.num = 1;
454						statep->nparen = 0;
455						break;
456					}
457					/*
458					 * If this is a trim operation,
459					 * treat (,|,) specially in STBRACE.
460					 */
461					if (ctype(c, C_SUBOP2)) {
462						ungetsc(c);
463						if (Flag(FSH))
464							PUSH_STATE(STBRACEBOURNE);
465						else
466							PUSH_STATE(STBRACEKORN);
467					} else {
468						ungetsc(c);
469						if (state == SDQUOTE)
470							PUSH_STATE(SQBRACE);
471						else
472							PUSH_STATE(SBRACE);
473					}
474				} else if (ksh_isalphx(c)) {
475					*wp++ = OSUBST;
476					*wp++ = 'X';
477					do {
478						Xcheck(ws, wp);
479						*wp++ = c;
480						c = getsc();
481					} while (ksh_isalnux(c));
482					*wp++ = '\0';
483					*wp++ = CSUBST;
484					*wp++ = 'X';
485					ungetsc(c);
486				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
487					Xcheck(ws, wp);
488					*wp++ = OSUBST;
489					*wp++ = 'X';
490					*wp++ = c;
491					*wp++ = '\0';
492					*wp++ = CSUBST;
493					*wp++ = 'X';
494				} else if (c == '\'' && (state == SBASE)) {
495					/* XXX which other states are valid? */
496					*wp++ = OQUOTE;
497					ignore_backslash_newline++;
498					PUSH_STATE(SEQUOTE);
499					statep->ls_bool = false;
500					break;
501				} else if (c == '"' && (state == SBASE)) {
502					/* XXX which other states are valid? */
503					goto DEQUOTE;
504				} else {
505					*wp++ = CHAR;
506					*wp++ = '$';
507 DEQUOTE:
508					ungetsc(c);
509				}
510				break;
511			case '`':
512 subst_gravis:
513				PUSH_STATE(SBQUOTE);
514				*wp++ = COMSUB;
515				/*
516				 * Need to know if we are inside double quotes
517				 * since sh/AT&T-ksh translate the \" to " in
518				 * "`...\"...`".
519				 * This is not done in POSIX mode (section
520				 * 3.2.3, Double Quotes: "The backquote shall
521				 * retain its special meaning introducing the
522				 * other form of command substitution (see
523				 * 3.6.3). The portion of the quoted string
524				 * from the initial backquote and the
525				 * characters up to the next backquote that
526				 * is not preceded by a backslash (having
527				 * escape characters removed) defines that
528				 * command whose output replaces `...` when
529				 * the word is expanded."
530				 * Section 3.6.3, Command Substitution:
531				 * "Within the backquoted style of command
532				 * substitution, backslash shall retain its
533				 * literal meaning, except when followed by
534				 * $ ` \.").
535				 */
536				statep->ls_bool = false;
537				s2 = statep;
538				base = state_info.base;
539				while (/* CONSTCOND */ 1) {
540					for (; s2 != base; s2--) {
541						if (s2->type == SDQUOTE) {
542							statep->ls_bool = true;
543							break;
544						}
545					}
546					if (s2 != base)
547						break;
548					if (!(s2 = s2->ls_base))
549						break;
550					base = s2-- - STATE_BSIZE;
551				}
552				break;
553			case QCHAR:
554				if (cf & LQCHAR) {
555					*wp++ = QCHAR;
556					*wp++ = getsc();
557					break;
558				}
559				/* FALLTHROUGH */
560			default:
561 store_char:
562				*wp++ = CHAR;
563				*wp++ = c;
564			}
565			break;
566
567		case SEQUOTE:
568			if (c == '\'') {
569				POP_STATE();
570				*wp++ = CQUOTE;
571				ignore_backslash_newline--;
572			} else if (c == '\\') {
573				if ((c2 = unbksl(true, s_get, s_put)) == -1)
574					c2 = s_get();
575				if (c2 == 0)
576					statep->ls_bool = true;
577				if (!statep->ls_bool) {
578					char ts[4];
579
580					if ((unsigned int)c2 < 0x100) {
581						*wp++ = QCHAR;
582						*wp++ = c2;
583					} else {
584						cz = utf_wctomb(ts, c2 - 0x100);
585						ts[cz] = 0;
586						for (cz = 0; ts[cz]; ++cz) {
587							*wp++ = QCHAR;
588							*wp++ = ts[cz];
589						}
590					}
591				}
592			} else if (!statep->ls_bool) {
593				*wp++ = QCHAR;
594				*wp++ = c;
595			}
596			break;
597
598		case SSQUOTE:
599			if (c == '\'') {
600				POP_STATE();
601				*wp++ = CQUOTE;
602				ignore_backslash_newline--;
603			} else {
604				*wp++ = QCHAR;
605				*wp++ = c;
606			}
607			break;
608
609		case SDQUOTE:
610			if (c == '"') {
611				POP_STATE();
612				*wp++ = CQUOTE;
613			} else
614				goto Subst;
615			break;
616
617		/* $(( ... )) */
618		case SASPAREN:
619			if (c == '(')
620				statep->nparen++;
621			else if (c == ')') {
622				statep->nparen--;
623				if (statep->nparen == 1) {
624					/* end of EXPRSUB */
625					POP_SRETRACE();
626					POP_STATE();
627
628					if ((c2 = getsc()) == /*(*/ ')') {
629						cz = strlen(sp) - 2;
630						XcheckN(ws, wp, cz);
631						memcpy(wp, sp + 1, cz);
632						wp += cz;
633						afree(sp, ATEMP);
634						*wp++ = '\0';
635						break;
636					} else {
637						Source *s;
638
639						ungetsc(c2);
640						/*
641						 * mismatched parenthesis -
642						 * assume we were really
643						 * parsing a $(...) expression
644						 */
645						--wp;
646						s = pushs(SREREAD,
647						    source->areap);
648						s->start = s->str =
649						    s->u.freeme = sp;
650						s->next = source;
651						source = s;
652						goto subst_command;
653					}
654				}
655			}
656			/* reuse existing state machine */
657			goto Sbase2;
658
659		case SQBRACE:
660			if (c == '\\') {
661				/*
662				 * perform POSIX "quote removal" if the back-
663				 * slash is "special", i.e. same cases as the
664				 * {case '\\':} in Subst: plus closing brace;
665				 * in mksh code "quote removal" on '\c' means
666				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
667				 * emitted (in heredocquote:)
668				 */
669				if ((c = getsc()) == '"' || c == '\\' ||
670				    c == '$' || c == '`' || c == /*{*/'}')
671					goto store_qchar;
672				goto heredocquote;
673			}
674			goto common_SQBRACE;
675
676		case SBRACE:
677			if (c == '\'')
678				goto open_ssquote;
679			else if (c == '\\')
680				goto getsc_qchar;
681 common_SQBRACE:
682			if (c == '"')
683				goto open_sdquote;
684			else if (c == '$')
685				goto subst_dollar;
686			else if (c == '`')
687				goto subst_gravis;
688			else if (c != /*{*/ '}')
689				goto store_char;
690			POP_STATE();
691			*wp++ = CSUBST;
692			*wp++ = /*{*/ '}';
693			break;
694
695		/* Same as SBASE, except (,|,) treated specially */
696		case STBRACEKORN:
697			if (c == '|')
698				*wp++ = SPAT;
699			else if (c == '(') {
700				*wp++ = OPAT;
701				/* simile for @ */
702				*wp++ = ' ';
703				PUSH_STATE(SPATTERN);
704			} else /* FALLTHROUGH */
705		case STBRACEBOURNE:
706			  if (c == /*{*/ '}') {
707				POP_STATE();
708				*wp++ = CSUBST;
709				*wp++ = /*{*/ '}';
710			} else
711				goto Sbase1;
712			break;
713
714		case SBQUOTE:
715			if (c == '`') {
716				*wp++ = 0;
717				POP_STATE();
718			} else if (c == '\\') {
719				switch (c = getsc()) {
720				case 0:
721					/* trailing \ is lost */
722					break;
723				case '\\':
724				case '$': case '`':
725					*wp++ = c;
726					break;
727				case '"':
728					if (statep->ls_bool) {
729						*wp++ = c;
730						break;
731					}
732					/* FALLTHROUGH */
733				default:
734					*wp++ = '\\';
735					*wp++ = c;
736					break;
737				}
738			} else
739				*wp++ = c;
740			break;
741
742		/* ONEWORD */
743		case SWORD:
744			goto Subst;
745
746		/* LETEXPR: (( ... )) */
747		case SLETPAREN:
748			if (c == /*(*/ ')') {
749				if (statep->nparen > 0)
750					--statep->nparen;
751				else if ((c2 = getsc()) == /*(*/ ')') {
752					c = 0;
753					*wp++ = CQUOTE;
754					goto Done;
755				} else {
756					Source *s;
757
758					ungetsc(c2);
759					/*
760					 * mismatched parenthesis -
761					 * assume we were really
762					 * parsing a (...) expression
763					 */
764					*wp = EOS;
765					sp = Xstring(ws, wp);
766					dp = wdstrip(sp, WDS_KEEPQ);
767					s = pushs(SREREAD, source->areap);
768					s->start = s->str = s->u.freeme = dp;
769					s->next = source;
770					source = s;
771					return ('('/*)*/);
772				}
773			} else if (c == '(')
774				/*
775				 * parentheses inside quotes and
776				 * backslashes are lost, but AT&T ksh
777				 * doesn't count them either
778				 */
779				++statep->nparen;
780			goto Sbase2;
781
782		/* <<< delimiter */
783		case SHERESTRING:
784			if (c == '\\') {
785				c = getsc();
786				if (c) {
787					/* trailing \ is lost */
788					*wp++ = QCHAR;
789					*wp++ = c;
790				}
791			} else if (c == '$') {
792				if ((c2 = getsc()) == '\'') {
793					PUSH_STATE(SEQUOTE);
794					statep->ls_bool = false;
795					goto sherestring_quoted;
796				} else if (c2 == '"')
797					goto sherestring_dquoted;
798				ungetsc(c2);
799				goto sherestring_regular;
800			} else if (c == '\'') {
801				PUSH_STATE(SSQUOTE);
802 sherestring_quoted:
803				*wp++ = OQUOTE;
804				ignore_backslash_newline++;
805			} else if (c == '"') {
806 sherestring_dquoted:
807				state = statep->type = SHEREDQUOTE;
808				*wp++ = OQUOTE;
809				/* just don't IFS split; no quoting mode */
810			} else {
811 sherestring_regular:
812				*wp++ = CHAR;
813				*wp++ = c;
814			}
815			break;
816
817		/* <<,<<- delimiter */
818		case SHEREDELIM:
819			/*
820			 * XXX chuck this state (and the next) - use
821			 * the existing states ($ and \`...` should be
822			 * stripped of their specialness after the
823			 * fact).
824			 */
825			/*
826			 * here delimiters need a special case since
827			 * $ and `...` are not to be treated specially
828			 */
829			if (c == '\\') {
830				c = getsc();
831				if (c) {
832					/* trailing \ is lost */
833					*wp++ = QCHAR;
834					*wp++ = c;
835				}
836			} else if (c == '$') {
837				if ((c2 = getsc()) == '\'') {
838					PUSH_STATE(SEQUOTE);
839					statep->ls_bool = false;
840					goto sheredelim_quoted;
841				} else if (c2 == '"')
842					goto sheredelim_dquoted;
843				ungetsc(c2);
844				goto sheredelim_regular;
845			} else if (c == '\'') {
846				PUSH_STATE(SSQUOTE);
847 sheredelim_quoted:
848				*wp++ = OQUOTE;
849				ignore_backslash_newline++;
850			} else if (c == '"') {
851 sheredelim_dquoted:
852				state = statep->type = SHEREDQUOTE;
853				*wp++ = OQUOTE;
854			} else {
855 sheredelim_regular:
856				*wp++ = CHAR;
857				*wp++ = c;
858			}
859			break;
860
861		/* " in <<,<<- delimiter */
862		case SHEREDQUOTE:
863			if (c == '"') {
864				*wp++ = CQUOTE;
865				state = statep->type =
866				    /* dp[1] == '<' means here string */
867				    Xstring(ws, wp)[1] == '<' ?
868				    SHERESTRING : SHEREDELIM;
869			} else {
870				if (c == '\\') {
871					switch (c = getsc()) {
872					case 0:
873						/* trailing \ is lost */
874					case '\\':
875					case '"':
876					case '$':
877					case '`':
878						break;
879					default:
880						*wp++ = CHAR;
881						*wp++ = '\\';
882						break;
883					}
884				}
885				*wp++ = CHAR;
886				*wp++ = c;
887			}
888			break;
889
890		/* in *(...|...) pattern (*+?@!) */
891		case SPATTERN:
892			if (c == /*(*/ ')') {
893				*wp++ = CPAT;
894				POP_STATE();
895			} else if (c == '|') {
896				*wp++ = SPAT;
897			} else if (c == '(') {
898				*wp++ = OPAT;
899				/* simile for @ */
900				*wp++ = ' ';
901				PUSH_STATE(SPATTERN);
902			} else
903				goto Sbase1;
904			break;
905		}
906	}
907 Done:
908	Xcheck(ws, wp);
909	if (statep != &states[1])
910		/* XXX figure out what is missing */
911		yyerror("no closing quote\n");
912
913	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
914	if (state == SHEREDELIM || state == SHERESTRING)
915		state = SBASE;
916
917	dp = Xstring(ws, wp);
918	if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
919		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
920
921		if (Xlength(ws, wp) == 0)
922			iop->unit = c == '<' ? 0 : 1;
923		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
924			if (dp[c2] != CHAR)
925				goto no_iop;
926			if (!ksh_isdigit(dp[c2 + 1]))
927				goto no_iop;
928			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
929		}
930
931		if (iop->unit >= FDBASE)
932			goto no_iop;
933
934		if (c == '&') {
935			if ((c2 = getsc()) != '>') {
936				ungetsc(c2);
937				goto no_iop;
938			}
939			c = c2;
940			iop->flag = IOBASH;
941		} else
942			iop->flag = 0;
943
944		c2 = getsc();
945		/* <<, >>, <> are ok, >< is not */
946		if (c == c2 || (c == '<' && c2 == '>')) {
947			iop->flag |= c == c2 ?
948			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
949			if (iop->flag == IOHERE) {
950				if ((c2 = getsc()) == '-') {
951					iop->flag |= IOSKIP;
952					c2 = getsc();
953				} else if (c2 == '<')
954					iop->flag |= IOHERESTR;
955				ungetsc(c2);
956				if (c2 == '\n')
957					iop->flag |= IONDELIM;
958			}
959		} else if (c2 == '&')
960			iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
961		else {
962			iop->flag |= c == '>' ? IOWRITE : IOREAD;
963			if (c == '>' && c2 == '|')
964				iop->flag |= IOCLOB;
965			else
966				ungetsc(c2);
967		}
968
969		iop->name = NULL;
970		iop->delim = NULL;
971		iop->heredoc = NULL;
972		/* free word */
973		Xfree(ws, wp);
974		yylval.iop = iop;
975		return (REDIR);
976 no_iop:
977		afree(iop, ATEMP);
978	}
979
980	if (wp == dp && state == SBASE) {
981		/* free word */
982		Xfree(ws, wp);
983		/* no word, process LEX1 character */
984		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
985			if ((c2 = getsc()) == c)
986				c = (c == ';') ? BREAK :
987				    (c == '|') ? LOGOR :
988				    (c == '&') ? LOGAND :
989				    /* c == '(' ) */ MDPAREN;
990			else if (c == '|' && c2 == '&')
991				c = COPROC;
992			else if (c == ';' && c2 == '|')
993				c = BRKEV;
994			else if (c == ';' && c2 == '&')
995				c = BRKFT;
996			else
997				ungetsc(c2);
998#ifndef MKSH_SMALL
999			if (c == BREAK) {
1000				if ((c2 = getsc()) == '&')
1001					c = BRKEV;
1002				else
1003					ungetsc(c2);
1004			}
1005#endif
1006		} else if (c == '\n') {
1007			gethere(false);
1008			if (cf & CONTIN)
1009				goto Again;
1010		} else if (c == '\0')
1011			/* need here strings at EOF */
1012			gethere(true);
1013		return (c);
1014	}
1015
1016	/* terminate word */
1017	*wp++ = EOS;
1018	yylval.cp = Xclose(ws, wp);
1019	if (state == SWORD || state == SLETPAREN
1020	    /* XXX ONEWORD? */)
1021		return (LWORD);
1022
1023	/* unget terminator */
1024	ungetsc(c);
1025
1026	/*
1027	 * note: the alias-vs-function code below depends on several
1028	 * interna: starting from here, source->str is not modified;
1029	 * the way getsc() and ungetsc() operate; etc.
1030	 */
1031
1032	/* copy word to unprefixed string ident */
1033	sp = yylval.cp;
1034	dp = ident;
1035	if ((cf & HEREDELIM) && (sp[1] == '<'))
1036		while (dp < ident+IDENT) {
1037			if ((c = *sp++) == CHAR)
1038				*dp++ = *sp++;
1039			else if ((c != OQUOTE) && (c != CQUOTE))
1040				break;
1041		}
1042	else
1043		while (dp < ident+IDENT && (c = *sp++) == CHAR)
1044			*dp++ = *sp++;
1045	/* Make sure the ident array stays '\0' padded */
1046	memset(dp, 0, (ident+IDENT) - dp + 1);
1047	if (c != EOS)
1048		/* word is not unquoted */
1049		*ident = '\0';
1050
1051	if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1052		struct tbl *p;
1053		uint32_t h = hash(ident);
1054
1055		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1056		    (!(cf & ESACONLY) || p->val.i == ESAC ||
1057		    p->val.i == /*{*/ '}')) {
1058			afree(yylval.cp, ATEMP);
1059			return (p->val.i);
1060		}
1061		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1062		    (p->flag & ISSET)) {
1063			/*
1064			 * this still points to the same character as the
1065			 * ungetsc'd terminator from above
1066			 */
1067			const char *cp = source->str;
1068
1069			/* prefer POSIX but not Korn functions over aliases */
1070			while (*cp == ' ' || *cp == '\t')
1071				/*
1072				 * this is like getsc() without skipping
1073				 * over Source boundaries (including not
1074				 * parsing ungetsc'd characters that got
1075				 * pushed into an SREREAD) which is what
1076				 * we want here anyway: find out whether
1077				 * the alias name is followed by a POSIX
1078				 * function definition (only the opening
1079				 * parenthesis is checked though)
1080				 */
1081				++cp;
1082			/* prefer functions over aliases */
1083			if (cp[0] != '(' || cp[1] != ')') {
1084				Source *s = source;
1085
1086				while (s && (s->flags & SF_HASALIAS))
1087					if (s->u.tblp == p)
1088						return (LWORD);
1089					else
1090						s = s->next;
1091				/* push alias expansion */
1092				s = pushs(SALIAS, source->areap);
1093				s->start = s->str = p->val.s;
1094				s->u.tblp = p;
1095				s->flags |= SF_HASALIAS;
1096				s->next = source;
1097				if (source->type == SEOF) {
1098					/* prevent infinite recursion at EOS */
1099					source->u.tblp = p;
1100					source->flags |= SF_HASALIAS;
1101				}
1102				source = s;
1103				afree(yylval.cp, ATEMP);
1104				goto Again;
1105			}
1106		}
1107	}
1108
1109	return (LWORD);
1110}
1111
1112static void
1113gethere(bool iseof)
1114{
1115	struct ioword **p;
1116
1117	for (p = heres; p < herep; p++)
1118		if (iseof && !((*p)->flag & IOHERESTR))
1119			/* only here strings at EOF */
1120			return;
1121		else
1122			readhere(*p);
1123	herep = heres;
1124}
1125
1126/*
1127 * read "<<word" text into temp file
1128 */
1129
1130static void
1131readhere(struct ioword *iop)
1132{
1133	int c;
1134	const char *eof, *eofp;
1135	XString xs;
1136	char *xp;
1137	int xpos;
1138
1139	if (iop->flag & IOHERESTR) {
1140		/* process the here string */
1141		iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1142		xpos = strlen(xp) - 1;
1143		memmove(xp, xp + 1, xpos);
1144		xp[xpos] = '\n';
1145		return;
1146	}
1147
1148	eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1149
1150	if (!(iop->flag & IOEVAL))
1151		ignore_backslash_newline++;
1152
1153	Xinit(xs, xp, 256, ATEMP);
1154
1155 heredoc_read_line:
1156	/* beginning of line */
1157	eofp = eof;
1158	xpos = Xsavepos(xs, xp);
1159	if (iop->flag & IOSKIP) {
1160		/* skip over leading tabs */
1161		while ((c = getsc()) == '\t')
1162			/* nothing */;
1163		goto heredoc_parse_char;
1164	}
1165 heredoc_read_char:
1166	c = getsc();
1167 heredoc_parse_char:
1168	/* compare with here document marker */
1169	if (!*eofp) {
1170		/* end of here document marker, what to do? */
1171		switch (c) {
1172		case /*(*/ ')':
1173			if (!subshell_nesting_level)
1174				/*-
1175				 * not allowed outside $(...) or (...)
1176				 * => mismatch
1177				 */
1178				break;
1179			/* allow $(...) or (...) to close here */
1180			ungetsc(/*(*/ ')');
1181			/* FALLTHROUGH */
1182		case 0:
1183			/*
1184			 * Allow EOF here to commands without trailing
1185			 * newlines (mksh -c '...') will work as well.
1186			 */
1187		case '\n':
1188			/* Newline terminates here document marker */
1189			goto heredoc_found_terminator;
1190		}
1191	} else if (c == *eofp++)
1192		/* store; then read and compare next character */
1193		goto heredoc_store_and_loop;
1194	/* nope, mismatch; read until end of line */
1195	while (c != '\n') {
1196		if (!c)
1197			/* oops, reached EOF */
1198			yyerror("%s '%s' unclosed\n", "here document", eof);
1199		/* store character */
1200		Xcheck(xs, xp);
1201		Xput(xs, xp, c);
1202		/* read next character */
1203		c = getsc();
1204	}
1205	/* we read a newline as last character */
1206 heredoc_store_and_loop:
1207	/* store character */
1208	Xcheck(xs, xp);
1209	Xput(xs, xp, c);
1210	if (c == '\n')
1211		goto heredoc_read_line;
1212	goto heredoc_read_char;
1213
1214 heredoc_found_terminator:
1215	/* jump back to saved beginning of line */
1216	xp = Xrestpos(xs, xp, xpos);
1217	/* terminate, close and store */
1218	Xput(xs, xp, '\0');
1219	iop->heredoc = Xclose(xs, xp);
1220
1221	if (!(iop->flag & IOEVAL))
1222		ignore_backslash_newline--;
1223}
1224
1225void
1226yyerror(const char *fmt, ...)
1227{
1228	va_list va;
1229
1230	/* pop aliases and re-reads */
1231	while (source->type == SALIAS || source->type == SREREAD)
1232		source = source->next;
1233	/* zap pending input */
1234	source->str = null;
1235
1236	error_prefix(true);
1237	va_start(va, fmt);
1238	shf_vfprintf(shl_out, fmt, va);
1239	va_end(va);
1240	errorfz();
1241}
1242
1243/*
1244 * input for yylex with alias expansion
1245 */
1246
1247Source *
1248pushs(int type, Area *areap)
1249{
1250	Source *s;
1251
1252	s = alloc(sizeof(Source), areap);
1253	memset(s, 0, sizeof(Source));
1254	s->type = type;
1255	s->str = null;
1256	s->areap = areap;
1257	if (type == SFILE || type == SSTDIN)
1258		XinitN(s->xs, 256, s->areap);
1259	return (s);
1260}
1261
1262static int
1263getsc_uu(void)
1264{
1265	Source *s = source;
1266	int c;
1267
1268	while ((c = *s->str++) == 0) {
1269		/* return 0 for EOF by default */
1270		s->str = NULL;
1271		switch (s->type) {
1272		case SEOF:
1273			s->str = null;
1274			return (0);
1275
1276		case SSTDIN:
1277		case SFILE:
1278			getsc_line(s);
1279			break;
1280
1281		case SWSTR:
1282			break;
1283
1284		case SSTRING:
1285			break;
1286
1287		case SWORDS:
1288			s->start = s->str = *s->u.strv++;
1289			s->type = SWORDSEP;
1290			break;
1291
1292		case SWORDSEP:
1293			if (*s->u.strv == NULL) {
1294				s->start = s->str = "\n";
1295				s->type = SEOF;
1296			} else {
1297				s->start = s->str = " ";
1298				s->type = SWORDS;
1299			}
1300			break;
1301
1302		case SALIAS:
1303			if (s->flags & SF_ALIASEND) {
1304				/* pass on an unused SF_ALIAS flag */
1305				source = s->next;
1306				source->flags |= s->flags & SF_ALIAS;
1307				s = source;
1308			} else if (*s->u.tblp->val.s &&
1309			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1310				/* pop source stack */
1311				source = s = s->next;
1312				/*
1313				 * Note that this alias ended with a
1314				 * space, enabling alias expansion on
1315				 * the following word.
1316				 */
1317				s->flags |= SF_ALIAS;
1318			} else {
1319				/*
1320				 * At this point, we need to keep the current
1321				 * alias in the source list so recursive
1322				 * aliases can be detected and we also need to
1323				 * return the next character. Do this by
1324				 * temporarily popping the alias to get the
1325				 * next character and then put it back in the
1326				 * source list with the SF_ALIASEND flag set.
1327				 */
1328				/* pop source stack */
1329				source = s->next;
1330				source->flags |= s->flags & SF_ALIAS;
1331				c = getsc_uu();
1332				if (c) {
1333					s->flags |= SF_ALIASEND;
1334					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1335					s->start = s->str = s->ugbuf;
1336					s->next = source;
1337					source = s;
1338				} else {
1339					s = source;
1340					/* avoid reading EOF twice */
1341					s->str = NULL;
1342					break;
1343				}
1344			}
1345			continue;
1346
1347		case SREREAD:
1348			if (s->start != s->ugbuf)
1349				/* yuck */
1350				afree(s->u.freeme, ATEMP);
1351			source = s = s->next;
1352			continue;
1353		}
1354		if (s->str == NULL) {
1355			s->type = SEOF;
1356			s->start = s->str = null;
1357			return ('\0');
1358		}
1359		if (s->flags & SF_ECHO) {
1360			shf_puts(s->str, shl_out);
1361			shf_flush(shl_out);
1362		}
1363	}
1364	return (c);
1365}
1366
1367static void
1368getsc_line(Source *s)
1369{
1370	char *xp = Xstring(s->xs, xp), *cp;
1371	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1372	int have_tty = interactive && (s->flags & SF_TTY);
1373
1374	/* Done here to ensure nothing odd happens when a timeout occurs */
1375	XcheckN(s->xs, xp, LINE);
1376	*xp = '\0';
1377	s->start = s->str = xp;
1378
1379	if (have_tty && ksh_tmout) {
1380		ksh_tmout_state = TMOUT_READING;
1381		alarm(ksh_tmout);
1382	}
1383	if (interactive)
1384		change_winsz();
1385	if (have_tty && (
1386#if !MKSH_S_NOVI
1387	    Flag(FVI) ||
1388#endif
1389	    Flag(FEMACS) || Flag(FGMACS))) {
1390		int nread;
1391
1392		nread = x_read(xp, LINE);
1393		if (nread < 0)
1394			/* read error */
1395			nread = 0;
1396		xp[nread] = '\0';
1397		xp += nread;
1398	} else {
1399		if (interactive)
1400			pprompt(prompt, 0);
1401		else
1402			s->line++;
1403
1404		while (/* CONSTCOND */ 1) {
1405			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1406
1407			if (!p && shf_error(s->u.shf) &&
1408			    shf_errno(s->u.shf) == EINTR) {
1409				shf_clearerr(s->u.shf);
1410				if (trap)
1411					runtraps(0);
1412				continue;
1413			}
1414			if (!p || (xp = p, xp[-1] == '\n'))
1415				break;
1416			/* double buffer size */
1417			/* move past NUL so doubling works... */
1418			xp++;
1419			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1420			/* ...and move back again */
1421			xp--;
1422		}
1423		/*
1424		 * flush any unwanted input so other programs/builtins
1425		 * can read it. Not very optimal, but less error prone
1426		 * than flushing else where, dealing with redirections,
1427		 * etc.
1428		 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1429		 */
1430		if (s->type == SSTDIN)
1431			shf_flush(s->u.shf);
1432	}
1433	/*
1434	 * XXX: temporary kludge to restore source after a
1435	 * trap may have been executed.
1436	 */
1437	source = s;
1438	if (have_tty && ksh_tmout) {
1439		ksh_tmout_state = TMOUT_EXECUTING;
1440		alarm(0);
1441	}
1442	cp = Xstring(s->xs, xp);
1443#ifndef MKSH_SMALL
1444	if (interactive && *cp == '!' && cur_prompt == PS1) {
1445		int linelen;
1446
1447		linelen = Xlength(s->xs, xp);
1448		XcheckN(s->xs, xp, Zfc_e_dash + /* NUL */ 1);
1449		/* reload after potential realloc */
1450		cp = Xstring(s->xs, xp);
1451		/* change initial '!' into space */
1452		*cp = ' ';
1453		/* NUL terminate the current string */
1454		*xp = '\0';
1455		/* move the actual string forward */
1456		memmove(cp + Zfc_e_dash, cp, linelen + /* NUL */ 1);
1457		xp += Zfc_e_dash;
1458		/* prepend it with "fc -e -" */
1459		memcpy(cp, Tfc_e_dash, Zfc_e_dash);
1460	}
1461#endif
1462	s->start = s->str = cp;
1463	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1464	/* Note: if input is all nulls, this is not eof */
1465	if (Xlength(s->xs, xp) == 0) {
1466		/* EOF */
1467		if (s->type == SFILE)
1468			shf_fdclose(s->u.shf);
1469		s->str = NULL;
1470	} else if (interactive && *s->str &&
1471	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1472		histsave(&s->line, s->str, true, true);
1473#if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1474	} else if (interactive && cur_prompt == PS1) {
1475		cp = Xstring(s->xs, xp);
1476		while (*cp && ctype(*cp, C_IFSWS))
1477			++cp;
1478		if (!*cp)
1479			histsync();
1480#endif
1481	}
1482	if (interactive)
1483		set_prompt(PS2, NULL);
1484}
1485
1486void
1487set_prompt(int to, Source *s)
1488{
1489	cur_prompt = to;
1490
1491	switch (to) {
1492	/* command */
1493	case PS1:
1494		/*
1495		 * Substitute ! and !! here, before substitutions are done
1496		 * so ! in expanded variables are not expanded.
1497		 * NOTE: this is not what AT&T ksh does (it does it after
1498		 * substitutions, POSIX doesn't say which is to be done.
1499		 */
1500		{
1501			struct shf *shf;
1502			char * volatile ps1;
1503			Area *saved_atemp;
1504
1505			ps1 = str_val(global("PS1"));
1506			shf = shf_sopen(NULL, strlen(ps1) * 2,
1507			    SHF_WR | SHF_DYNAMIC, NULL);
1508			while (*ps1)
1509				if (*ps1 != '!' || *++ps1 == '!')
1510					shf_putchar(*ps1++, shf);
1511				else
1512					shf_fprintf(shf, "%d",
1513						s ? s->line + 1 : 0);
1514			ps1 = shf_sclose(shf);
1515			saved_atemp = ATEMP;
1516			newenv(E_ERRH);
1517			if (sigsetjmp(e->jbuf, 0)) {
1518				prompt = safe_prompt;
1519				/*
1520				 * Don't print an error - assume it has already
1521				 * been printed. Reason is we may have forked
1522				 * to run a command and the child may be
1523				 * unwinding its stack through this code as it
1524				 * exits.
1525				 */
1526			} else {
1527				char *cp = substitute(ps1, 0);
1528				strdupx(prompt, cp, saved_atemp);
1529			}
1530			quitenv(NULL);
1531		}
1532		break;
1533	/* command continuation */
1534	case PS2:
1535		prompt = str_val(global("PS2"));
1536		break;
1537	}
1538}
1539
1540static int
1541dopprompt(const char *cp, int ntruncate, bool doprint)
1542{
1543	int columns = 0, lines = 0, indelimit = 0;
1544	char delimiter = 0;
1545
1546	/*
1547	 * Undocumented AT&T ksh feature:
1548	 * If the second char in the prompt string is \r then the first
1549	 * char is taken to be a non-printing delimiter and any chars
1550	 * between two instances of the delimiter are not considered to
1551	 * be part of the prompt length
1552	 */
1553	if (*cp && cp[1] == '\r') {
1554		delimiter = *cp;
1555		cp += 2;
1556	}
1557	for (; *cp; cp++) {
1558		if (indelimit && *cp != delimiter)
1559			;
1560		else if (*cp == '\n' || *cp == '\r') {
1561			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1562			columns = 0;
1563		} else if (*cp == '\t') {
1564			columns = (columns | 7) + 1;
1565		} else if (*cp == '\b') {
1566			if (columns > 0)
1567				columns--;
1568		} else if (*cp == delimiter)
1569			indelimit = !indelimit;
1570		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1571			const char *cp2;
1572			columns += utf_widthadj(cp, &cp2);
1573			if (doprint && (indelimit ||
1574			    (ntruncate < (x_cols * lines + columns))))
1575				shf_write(cp, cp2 - cp, shl_out);
1576			cp = cp2 - /* loop increment */ 1;
1577			continue;
1578		} else
1579			columns++;
1580		if (doprint && (*cp != delimiter) &&
1581		    (indelimit || (ntruncate < (x_cols * lines + columns))))
1582			shf_putc(*cp, shl_out);
1583	}
1584	if (doprint)
1585		shf_flush(shl_out);
1586	return (x_cols * lines + columns);
1587}
1588
1589
1590void
1591pprompt(const char *cp, int ntruncate)
1592{
1593	dopprompt(cp, ntruncate, true);
1594}
1595
1596int
1597promptlen(const char *cp)
1598{
1599	return (dopprompt(cp, 0, false));
1600}
1601
1602/*
1603 * Read the variable part of a ${...} expression (i.e. up to but not
1604 * including the :[-+?=#%] or close-brace).
1605 */
1606static char *
1607get_brace_var(XString *wsp, char *wp)
1608{
1609	char c;
1610	enum parse_state {
1611		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1612		PS_NUMBER, PS_VAR1
1613	} state = PS_INITIAL;
1614
1615	while (/* CONSTCOND */ 1) {
1616		c = getsc();
1617		/* State machine to figure out where the variable part ends. */
1618		switch (state) {
1619		case PS_INITIAL:
1620			if (c == '#' || c == '!' || c == '%') {
1621				state = PS_SAW_HASH;
1622				break;
1623			}
1624			/* FALLTHROUGH */
1625		case PS_SAW_HASH:
1626			if (ksh_isalphx(c))
1627				state = PS_IDENT;
1628			else if (ksh_isdigit(c))
1629				state = PS_NUMBER;
1630			else if (c == '#') {
1631				if (state == PS_SAW_HASH) {
1632					char c2;
1633
1634					c2 = getsc();
1635					ungetsc(c2);
1636					if (c2 != '}') {
1637						ungetsc(c);
1638						goto out;
1639					}
1640				}
1641				state = PS_VAR1;
1642			} else if (ctype(c, C_VAR1))
1643				state = PS_VAR1;
1644			else
1645				goto out;
1646			break;
1647		case PS_IDENT:
1648			if (!ksh_isalnux(c)) {
1649				if (c == '[') {
1650					char *tmp, *p;
1651
1652					if (!arraysub(&tmp))
1653						yyerror("missing ]\n");
1654					*wp++ = c;
1655					for (p = tmp; *p; ) {
1656						Xcheck(*wsp, wp);
1657						*wp++ = *p++;
1658					}
1659					afree(tmp, ATEMP);
1660					/* the ] */
1661					c = getsc();
1662				}
1663				goto out;
1664			}
1665			break;
1666		case PS_NUMBER:
1667			if (!ksh_isdigit(c))
1668				goto out;
1669			break;
1670		case PS_VAR1:
1671			goto out;
1672		}
1673		Xcheck(*wsp, wp);
1674		*wp++ = c;
1675	}
1676 out:
1677	/* end of variable part */
1678	*wp++ = '\0';
1679	ungetsc(c);
1680	return (wp);
1681}
1682
1683/*
1684 * Save an array subscript - returns true if matching bracket found, false
1685 * if eof or newline was found.
1686 * (Returned string double null terminated)
1687 */
1688static bool
1689arraysub(char **strp)
1690{
1691	XString ws;
1692	char *wp, c;
1693	/* we are just past the initial [ */
1694	int depth = 1;
1695
1696	Xinit(ws, wp, 32, ATEMP);
1697
1698	do {
1699		c = getsc();
1700		Xcheck(ws, wp);
1701		*wp++ = c;
1702		if (c == '[')
1703			depth++;
1704		else if (c == ']')
1705			depth--;
1706	} while (depth > 0 && c && c != '\n');
1707
1708	*wp++ = '\0';
1709	*strp = Xclose(ws, wp);
1710
1711	return (tobool(depth == 0));
1712}
1713
1714/* Unget a char: handles case when we are already at the start of the buffer */
1715static void
1716ungetsc(int c)
1717{
1718	struct sretrace_info *rp = retrace_info;
1719
1720	if (backslash_skip)
1721		backslash_skip--;
1722	/* Don't unget EOF... */
1723	if (source->str == null && c == '\0')
1724		return;
1725	while (rp) {
1726		if (Xlength(rp->xs, rp->xp))
1727			rp->xp--;
1728		rp = rp->next;
1729	}
1730	ungetsc_(c);
1731}
1732static void
1733ungetsc_(int c)
1734{
1735	if (source->str > source->start)
1736		source->str--;
1737	else {
1738		Source *s;
1739
1740		s = pushs(SREREAD, source->areap);
1741		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1742		s->start = s->str = s->ugbuf;
1743		s->next = source;
1744		source = s;
1745	}
1746}
1747
1748
1749/* Called to get a char that isn't a \newline sequence. */
1750static int
1751getsc_bn(void)
1752{
1753	int c, c2;
1754
1755	if (ignore_backslash_newline)
1756		return (o_getsc_u());
1757
1758	if (backslash_skip == 1) {
1759		backslash_skip = 2;
1760		return (o_getsc_u());
1761	}
1762
1763	backslash_skip = 0;
1764
1765	while (/* CONSTCOND */ 1) {
1766		c = o_getsc_u();
1767		if (c == '\\') {
1768			if ((c2 = o_getsc_u()) == '\n')
1769				/* ignore the \newline; get the next char... */
1770				continue;
1771			ungetsc_(c2);
1772			backslash_skip = 1;
1773		}
1774		return (c);
1775	}
1776}
1777
1778void
1779yyskiputf8bom(void)
1780{
1781	int c;
1782
1783	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1784		ungetsc_(c);
1785		return;
1786	}
1787	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1788		ungetsc_(c);
1789		ungetsc_(0xEF);
1790		return;
1791	}
1792	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1793		ungetsc_(c);
1794		ungetsc_(0xBB);
1795		ungetsc_(0xEF);
1796		return;
1797	}
1798	UTFMODE |= 8;
1799}
1800
1801static Lex_state *
1802push_state_(State_info *si, Lex_state *old_end)
1803{
1804	Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1805
1806	news[0].ls_base = old_end;
1807	si->base = &news[0];
1808	si->end = &news[STATE_BSIZE];
1809	return (&news[1]);
1810}
1811
1812static Lex_state *
1813pop_state_(State_info *si, Lex_state *old_end)
1814{
1815	Lex_state *old_base = si->base;
1816
1817	si->base = old_end->ls_base - STATE_BSIZE;
1818	si->end = old_end->ls_base;
1819
1820	afree(old_base, ATEMP);
1821
1822	return (si->base + STATE_BSIZE - 1);
1823}
1824
1825static int
1826s_get(void)
1827{
1828	return (getsc());
1829}
1830
1831static void
1832s_put(int c)
1833{
1834	ungetsc(c);
1835}
1836