lex.c revision 5155f1c7438ef540d7b25eb70aa1639579795b07
1/*	$OpenBSD: lex.c,v 1.44 2008/07/03 17:52:08 otto Exp $	*/
2
3/*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
5 *	Thorsten Glaser <tg@mirbsd.org>
6 *
7 * Provided that these terms and disclaimer and all copyright notices
8 * are retained or reproduced in an accompanying document, permission
9 * is granted to deal in this work without restriction, including un-
10 * limited rights to use, publicly perform, distribute, sell, modify,
11 * merge, give away, or sublicence.
12 *
13 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
14 * the utmost extent permitted by applicable law, neither express nor
15 * implied; without malicious intent or gross negligence. In no event
16 * may a licensor, author or contributor be held liable for indirect,
17 * direct, other damage, loss, or other issues arising in any way out
18 * of dealing in the work, even if advised of the possibility of such
19 * damage or existence of a defect, except proven that it results out
20 * of said person's immediate fault when using the work as intended.
21 */
22
23#include "sh.h"
24
25__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.118 2010/07/25 11:35:41 tg Exp $");
26
27/*
28 * states while lexing word
29 */
30#define SBASE		0	/* outside any lexical constructs */
31#define SWORD		1	/* implicit quoting for substitute() */
32#define SLETPAREN	2	/* inside (( )), implicit quoting */
33#define SSQUOTE		3	/* inside '' */
34#define SDQUOTE		4	/* inside "" */
35#define SEQUOTE		5	/* inside $'' */
36#define SBRACE		6	/* inside ${} */
37#define SQBRACE		7	/* inside "${}" */
38#define SCSPAREN	8	/* inside $() */
39#define SBQUOTE		9	/* inside `` */
40#define SASPAREN	10	/* inside $(( )) */
41#define SHEREDELIM	11	/* parsing <<,<<- delimiter */
42#define SHEREDQUOTE	12	/* parsing " in <<,<<- delimiter */
43#define SPATTERN	13	/* parsing *(...|...) pattern (*+?@!) */
44#define STBRACE		14	/* parsing ${...[#%]...} */
45#define SLETARRAY	15	/* inside =( ), just copy */
46#define SADELIM		16	/* like SBASE, looking for delimiter */
47#define SHERESTRING	17	/* parsing <<< string */
48
49/* Structure to keep track of the lexing state and the various pieces of info
50 * needed for each particular state. */
51typedef struct lex_state Lex_state;
52struct lex_state {
53	int ls_state;
54	union {
55		/* $(...) */
56		struct scsparen_info {
57			int nparen;	/* count open parenthesis */
58			int csstate;	/* XXX remove */
59#define ls_scsparen ls_info.u_scsparen
60		} u_scsparen;
61
62		/* $((...)) */
63		struct sasparen_info {
64			int nparen;	/* count open parenthesis */
65			int start;	/* marks start of $(( in output str */
66#define ls_sasparen ls_info.u_sasparen
67		} u_sasparen;
68
69		/* ((...)) */
70		struct sletparen_info {
71			int nparen;	/* count open parenthesis */
72#define ls_sletparen ls_info.u_sletparen
73		} u_sletparen;
74
75		/* `...` */
76		struct sbquote_info {
77			int indquotes;	/* true if in double quotes: "`...`" */
78#define ls_sbquote ls_info.u_sbquote
79		} u_sbquote;
80
81#ifndef MKSH_SMALL
82		/* =(...) */
83		struct sletarray_info {
84			int nparen;	/* count open parentheses */
85#define ls_sletarray ls_info.u_sletarray
86		} u_sletarray;
87#endif
88
89		/* ADELIM */
90		struct sadelim_info {
91			unsigned char nparen;	/* count open parentheses */
92#define SADELIM_BASH	0
93#define SADELIM_MAKE	1
94			unsigned char style;
95			unsigned char delimiter;
96			unsigned char num;
97			unsigned char flags;	/* ofs. into sadelim_flags[] */
98#define ls_sadelim ls_info.u_sadelim
99		} u_sadelim;
100
101		/* $'...' */
102		struct sequote_info {
103			bool got_NUL;	/* ignore rest of string */
104#define ls_sequote ls_info.u_sequote
105		} u_sequote;
106
107		Lex_state *base;	/* used to point to next state block */
108	} ls_info;
109};
110
111typedef struct {
112	Lex_state *base;
113	Lex_state *end;
114} State_info;
115
116static void readhere(struct ioword *);
117static int getsc__(void);
118static void getsc_line(Source *);
119static int getsc_bn(void);
120static int s_get(void);
121static void s_put(int);
122static char *get_brace_var(XString *, char *);
123static int arraysub(char **);
124static const char *ungetsc(int);
125static void gethere(bool);
126static Lex_state *push_state_(State_info *, Lex_state *);
127static Lex_state *pop_state_(State_info *, Lex_state *);
128
129static int dopprompt(const char *, int, bool);
130
131static int backslash_skip;
132static int ignore_backslash_newline;
133
134/* optimised getsc_bn() */
135#define _getsc()	(*source->str != '\0' && *source->str != '\\' \
136			 && !backslash_skip && !(source->flags & SF_FIRST) \
137			 ? *source->str++ : getsc_bn())
138/* optimised getsc__() */
139#define	_getsc_()	((*source->str != '\0') && !(source->flags & SF_FIRST) \
140			 ? *source->str++ : getsc__())
141
142#ifdef MKSH_SMALL
143static int getsc(void);
144static int getsc_(void);
145
146static int
147getsc(void)
148{
149	return (_getsc());
150}
151
152static int
153getsc_(void)
154{
155	return (_getsc_());
156}
157#else
158/* !MKSH_SMALL: use them inline */
159#define getsc()		_getsc()
160#define getsc_()	_getsc_()
161#endif
162
163#define STATE_BSIZE	32
164
165#define PUSH_STATE(s)	do {					\
166	if (++statep == state_info.end)				\
167		statep = push_state_(&state_info, statep);	\
168	state = statep->ls_state = (s);				\
169} while (0)
170
171#define POP_STATE()	do {					\
172	if (--statep == state_info.base)			\
173		statep = pop_state_(&state_info, statep);	\
174	state = statep->ls_state;				\
175} while (0)
176
177/**
178 * Lexical analyser
179 *
180 * tokens are not regular expressions, they are LL(1).
181 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
182 * hence the state stack.
183 */
184
185int
186yylex(int cf)
187{
188	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
189	State_info state_info;
190	int c, c2, state;
191	XString ws;		/* expandable output word */
192	char *wp;		/* output word pointer */
193	char *sp, *dp;
194
195 Again:
196	states[0].ls_state = -1;
197	states[0].ls_info.base = NULL;
198	statep = &states[1];
199	state_info.base = states;
200	state_info.end = &state_info.base[STATE_BSIZE];
201
202	Xinit(ws, wp, 64, ATEMP);
203
204	backslash_skip = 0;
205	ignore_backslash_newline = 0;
206
207	if (cf&ONEWORD)
208		state = SWORD;
209	else if (cf&LETEXPR) {
210		/* enclose arguments in (double) quotes */
211		*wp++ = OQUOTE;
212		state = SLETPAREN;
213		statep->ls_sletparen.nparen = 0;
214#ifndef MKSH_SMALL
215	} else if (cf&LETARRAY) {
216		state = SLETARRAY;
217		statep->ls_sletarray.nparen = 0;
218#endif
219	} else {		/* normal lexing */
220		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
221		while ((c = getsc()) == ' ' || c == '\t')
222			;
223		if (c == '#') {
224			ignore_backslash_newline++;
225			while ((c = getsc()) != '\0' && c != '\n')
226				;
227			ignore_backslash_newline--;
228		}
229		ungetsc(c);
230	}
231	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
232		source->flags &= ~SF_ALIAS;
233		cf |= ALIAS;
234	}
235
236	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
237	statep->ls_state = state;
238
239	/* check for here string */
240	if (state == SHEREDELIM) {
241		c = getsc();
242		if (c == '<') {
243			state = SHERESTRING;
244			while ((c = getsc()) == ' ' || c == '\t')
245				;
246			ungetsc(c);
247			c = '<';
248			goto accept_nonword;
249		}
250		ungetsc(c);
251	}
252
253	/* collect non-special or quoted characters to form word */
254	while (!((c = getsc()) == 0 ||
255	    ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
256	    ctype(c, C_LEX1)))) {
257 accept_nonword:
258		Xcheck(ws, wp);
259		switch (state) {
260		case SADELIM:
261			if (c == '(')
262				statep->ls_sadelim.nparen++;
263			else if (c == ')')
264				statep->ls_sadelim.nparen--;
265			else if (statep->ls_sadelim.nparen == 0 &&
266			    (c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) {
267				*wp++ = ADELIM;
268				*wp++ = c;
269				if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0)
270					POP_STATE();
271				if (c == /*{*/ '}')
272					POP_STATE();
273				break;
274			}
275			/* FALLTHROUGH */
276		case SBASE:
277			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
278				*wp = EOS;	/* temporary */
279				if (is_wdvarname(Xstring(ws, wp), false)) {
280					char *p, *tmp;
281
282					if (arraysub(&tmp)) {
283						*wp++ = CHAR;
284						*wp++ = c;
285						for (p = tmp; *p; ) {
286							Xcheck(ws, wp);
287							*wp++ = CHAR;
288							*wp++ = *p++;
289						}
290						afree(tmp, ATEMP);
291						break;
292					} else {
293						Source *s;
294
295						s = pushs(SREREAD,
296						    source->areap);
297						s->start = s->str =
298						    s->u.freeme = tmp;
299						s->next = source;
300						source = s;
301					}
302				}
303				*wp++ = CHAR;
304				*wp++ = c;
305				break;
306			}
307			/* FALLTHROUGH */
308 Sbase1:		/* includes *(...|...) pattern (*+?@!) */
309			if (c == '*' || c == '@' || c == '+' || c == '?' ||
310			    c == '!') {
311				c2 = getsc();
312				if (c2 == '(' /*)*/ ) {
313					*wp++ = OPAT;
314					*wp++ = c;
315					PUSH_STATE(SPATTERN);
316					break;
317				}
318				ungetsc(c2);
319			}
320			/* FALLTHROUGH */
321 Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
322			switch (c) {
323			case '\\':
324 getsc_qchar:
325				if ((c = getsc())) {
326					/* trailing \ is lost */
327					*wp++ = QCHAR;
328					*wp++ = c;
329				}
330				break;
331			case '\'':
332 open_ssquote:
333				*wp++ = OQUOTE;
334				ignore_backslash_newline++;
335				PUSH_STATE(SSQUOTE);
336				break;
337			case '"':
338 open_sdquote:
339				*wp++ = OQUOTE;
340				PUSH_STATE(SDQUOTE);
341				break;
342			default:
343				goto Subst;
344			}
345			break;
346
347 Subst:
348			switch (c) {
349			case '\\':
350				c = getsc();
351				switch (c) {
352				case '"':
353					if ((cf & HEREDOC))
354						goto heredocquote;
355					/* FALLTHROUGH */
356				case '\\':
357				case '$': case '`':
358 store_qchar:
359					*wp++ = QCHAR;
360					*wp++ = c;
361					break;
362				default:
363 heredocquote:
364					Xcheck(ws, wp);
365					if (c) {
366						/* trailing \ is lost */
367						*wp++ = CHAR;
368						*wp++ = '\\';
369						*wp++ = CHAR;
370						*wp++ = c;
371					}
372					break;
373				}
374				break;
375			case '$':
376 subst_dollar:
377				c = getsc();
378				if (c == '(') /*)*/ {
379					c = getsc();
380					if (c == '(') /*)*/ {
381						PUSH_STATE(SASPAREN);
382						statep->ls_sasparen.nparen = 2;
383						statep->ls_sasparen.start =
384						    Xsavepos(ws, wp);
385						*wp++ = EXPRSUB;
386					} else {
387						ungetsc(c);
388						PUSH_STATE(SCSPAREN);
389						statep->ls_scsparen.nparen = 1;
390						statep->ls_scsparen.csstate = 0;
391						*wp++ = COMSUB;
392					}
393				} else if (c == '{') /*}*/ {
394					*wp++ = OSUBST;
395					*wp++ = '{'; /*}*/
396					wp = get_brace_var(&ws, wp);
397					c = getsc();
398					/* allow :# and :% (ksh88 compat) */
399					if (c == ':') {
400						*wp++ = CHAR;
401						*wp++ = c;
402						c = getsc();
403						if (c == ':') {
404							*wp++ = CHAR;
405							*wp++ = '0';
406							*wp++ = ADELIM;
407							*wp++ = ':';
408							PUSH_STATE(SBRACE);
409							PUSH_STATE(SADELIM);
410							statep->ls_sadelim.style = SADELIM_BASH;
411							statep->ls_sadelim.delimiter = ':';
412							statep->ls_sadelim.num = 1;
413							statep->ls_sadelim.nparen = 0;
414							break;
415						} else if (ksh_isdigit(c) ||
416						    c == '('/*)*/ || c == ' ' ||
417						    c == '$' /* XXX what else? */) {
418							/* substring subst. */
419							if (c != ' ') {
420								*wp++ = CHAR;
421								*wp++ = ' ';
422							}
423							ungetsc(c);
424							PUSH_STATE(SBRACE);
425							PUSH_STATE(SADELIM);
426							statep->ls_sadelim.style = SADELIM_BASH;
427							statep->ls_sadelim.delimiter = ':';
428							statep->ls_sadelim.num = 2;
429							statep->ls_sadelim.nparen = 0;
430							break;
431						}
432					} else if (c == '/') {
433						*wp++ = CHAR;
434						*wp++ = c;
435						if ((c = getsc()) == '/') {
436							*wp++ = ADELIM;
437							*wp++ = c;
438						} else
439							ungetsc(c);
440						PUSH_STATE(SBRACE);
441						PUSH_STATE(SADELIM);
442						statep->ls_sadelim.style = SADELIM_BASH;
443						statep->ls_sadelim.delimiter = '/';
444						statep->ls_sadelim.num = 1;
445						statep->ls_sadelim.nparen = 0;
446						break;
447					}
448					/* If this is a trim operation,
449					 * treat (,|,) specially in STBRACE.
450					 */
451					if (ctype(c, C_SUBOP2)) {
452						ungetsc(c);
453						PUSH_STATE(STBRACE);
454					} else {
455						ungetsc(c);
456						if (state == SDQUOTE)
457							PUSH_STATE(SQBRACE);
458						else
459							PUSH_STATE(SBRACE);
460					}
461				} else if (ksh_isalphx(c)) {
462					*wp++ = OSUBST;
463					*wp++ = 'X';
464					do {
465						Xcheck(ws, wp);
466						*wp++ = c;
467						c = getsc();
468					} while (ksh_isalnux(c));
469					*wp++ = '\0';
470					*wp++ = CSUBST;
471					*wp++ = 'X';
472					ungetsc(c);
473				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
474					Xcheck(ws, wp);
475					*wp++ = OSUBST;
476					*wp++ = 'X';
477					*wp++ = c;
478					*wp++ = '\0';
479					*wp++ = CSUBST;
480					*wp++ = 'X';
481				} else if (c == '\'' && (state == SBASE)) {
482					/* XXX which other states are valid? */
483					*wp++ = OQUOTE;
484					ignore_backslash_newline++;
485					PUSH_STATE(SEQUOTE);
486					statep->ls_sequote.got_NUL = false;
487					break;
488				} else {
489					*wp++ = CHAR;
490					*wp++ = '$';
491					ungetsc(c);
492				}
493				break;
494			case '`':
495 subst_gravis:
496				PUSH_STATE(SBQUOTE);
497				*wp++ = COMSUB;
498				/* Need to know if we are inside double quotes
499				 * since sh/AT&T-ksh translate the \" to " in
500				 * "`...\"...`".
501				 * This is not done in POSIX mode (section
502				 * 3.2.3, Double Quotes: "The backquote shall
503				 * retain its special meaning introducing the
504				 * other form of command substitution (see
505				 * 3.6.3). The portion of the quoted string
506				 * from the initial backquote and the
507				 * characters up to the next backquote that
508				 * is not preceded by a backslash (having
509				 * escape characters removed) defines that
510				 * command whose output replaces `...` when
511				 * the word is expanded."
512				 * Section 3.6.3, Command Substitution:
513				 * "Within the backquoted style of command
514				 * substitution, backslash shall retain its
515				 * literal meaning, except when followed by
516				 * $ ` \.").
517				 */
518				statep->ls_sbquote.indquotes = 0;
519				s2 = statep;
520				base = state_info.base;
521				while (1) {
522					for (; s2 != base; s2--) {
523						if (s2->ls_state == SDQUOTE) {
524							statep->ls_sbquote.indquotes = 1;
525							break;
526						}
527					}
528					if (s2 != base)
529						break;
530					if (!(s2 = s2->ls_info.base))
531						break;
532					base = s2-- - STATE_BSIZE;
533				}
534				break;
535			case QCHAR:
536				if (cf & LQCHAR) {
537					*wp++ = QCHAR;
538					*wp++ = getsc();
539					break;
540				}
541				/* FALLTHROUGH */
542			default:
543 store_char:
544				*wp++ = CHAR;
545				*wp++ = c;
546			}
547			break;
548
549		case SEQUOTE:
550			if (c == '\'') {
551				POP_STATE();
552				*wp++ = CQUOTE;
553				ignore_backslash_newline--;
554			} else if (c == '\\') {
555				if ((c2 = unbksl(true, s_get, s_put)) == -1)
556					c2 = s_get();
557				if (c2 == 0)
558					statep->ls_sequote.got_NUL = true;
559				if (!statep->ls_sequote.got_NUL) {
560					char ts[4];
561
562					if ((unsigned int)c2 < 0x100) {
563						*wp++ = QCHAR;
564						*wp++ = c2;
565					} else {
566						c = utf_wctomb(ts, c2 - 0x100);
567						ts[c] = 0;
568						for (c = 0; ts[c]; ++c) {
569							*wp++ = QCHAR;
570							*wp++ = ts[c];
571						}
572					}
573				}
574			} else if (!statep->ls_sequote.got_NUL) {
575				*wp++ = QCHAR;
576				*wp++ = c;
577			}
578			break;
579
580		case SSQUOTE:
581			if (c == '\'') {
582				POP_STATE();
583				*wp++ = CQUOTE;
584				ignore_backslash_newline--;
585			} else {
586				*wp++ = QCHAR;
587				*wp++ = c;
588			}
589			break;
590
591		case SDQUOTE:
592			if (c == '"') {
593				POP_STATE();
594				*wp++ = CQUOTE;
595			} else
596				goto Subst;
597			break;
598
599		case SCSPAREN:	/* $( ... ) */
600			/* todo: deal with $(...) quoting properly
601			 * kludge to partly fake quoting inside $(...): doesn't
602			 * really work because nested $(...) or ${...} inside
603			 * double quotes aren't dealt with.
604			 */
605			switch (statep->ls_scsparen.csstate) {
606			case 0:	/* normal */
607				switch (c) {
608				case '(':
609					statep->ls_scsparen.nparen++;
610					break;
611				case ')':
612					statep->ls_scsparen.nparen--;
613					break;
614				case '\\':
615					statep->ls_scsparen.csstate = 1;
616					break;
617				case '"':
618					statep->ls_scsparen.csstate = 2;
619					break;
620				case '\'':
621					statep->ls_scsparen.csstate = 4;
622					ignore_backslash_newline++;
623					break;
624				}
625				break;
626
627			case 1:	/* backslash in normal mode */
628			case 3:	/* backslash in double quotes */
629				--statep->ls_scsparen.csstate;
630				break;
631
632			case 2:	/* double quotes */
633				if (c == '"')
634					statep->ls_scsparen.csstate = 0;
635				else if (c == '\\')
636					statep->ls_scsparen.csstate = 3;
637				break;
638
639			case 4:	/* single quotes */
640				if (c == '\'') {
641					statep->ls_scsparen.csstate = 0;
642					ignore_backslash_newline--;
643				}
644				break;
645			}
646			if (statep->ls_scsparen.nparen == 0) {
647				POP_STATE();
648				*wp++ = 0;	/* end of COMSUB */
649			} else
650				*wp++ = c;
651			break;
652
653		case SASPAREN:	/* $(( ... )) */
654			/* XXX should nest using existing state machine
655			 * (embed "...", $(...), etc.) */
656			if (c == '(')
657				statep->ls_sasparen.nparen++;
658			else if (c == ')') {
659				statep->ls_sasparen.nparen--;
660				if (statep->ls_sasparen.nparen == 1) {
661					/*(*/
662					if ((c2 = getsc()) == ')') {
663						POP_STATE();
664						/* end of EXPRSUB */
665						*wp++ = 0;
666						break;
667					} else {
668						char *s;
669
670						ungetsc(c2);
671						/* mismatched parenthesis -
672						 * assume we were really
673						 * parsing a $(...) expression
674						 */
675						s = Xrestpos(ws, wp,
676						    statep->ls_sasparen.start);
677						memmove(s + 1, s, wp - s);
678						*s++ = COMSUB;
679						*s = '('; /*)*/
680						wp++;
681						statep->ls_scsparen.nparen = 1;
682						statep->ls_scsparen.csstate = 0;
683						state = statep->ls_state =
684						    SCSPAREN;
685					}
686				}
687			}
688			*wp++ = c;
689			break;
690
691		case SQBRACE:
692			if (c == '\\') {
693				/*
694				 * perform POSIX "quote removal" if the back-
695				 * slash is "special", i.e. same cases as the
696				 * {case '\\':} in Subst: plus closing brace;
697				 * in mksh code "quote removal" on '\c' means
698				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
699				 * emitted (in heredocquote:)
700				 */
701				if ((c = getsc()) == '"' || c == '\\' ||
702				    c == '$' || c == '`' || c == /*{*/'}')
703					goto store_qchar;
704				goto heredocquote;
705			}
706			goto common_SQBRACE;
707
708		case SBRACE:
709			if (c == '\'')
710				goto open_ssquote;
711			else if (c == '\\')
712				goto getsc_qchar;
713 common_SQBRACE:
714			if (c == '"')
715				goto open_sdquote;
716			else if (c == '$')
717				goto subst_dollar;
718			else if (c == '`')
719				goto subst_gravis;
720			else if (c != /*{*/ '}')
721				goto store_char;
722			POP_STATE();
723			*wp++ = CSUBST;
724			*wp++ = /*{*/ '}';
725			break;
726
727		case STBRACE:
728			/* Same as SBASE, except (,|,) treated specially */
729			if (c == /*{*/ '}') {
730				POP_STATE();
731				*wp++ = CSUBST;
732				*wp++ = /*{*/ '}';
733			} else if (c == '|') {
734				*wp++ = SPAT;
735			} else if (c == '(') {
736				*wp++ = OPAT;
737				*wp++ = ' ';	/* simile for @ */
738				PUSH_STATE(SPATTERN);
739			} else
740				goto Sbase1;
741			break;
742
743		case SBQUOTE:
744			if (c == '`') {
745				*wp++ = 0;
746				POP_STATE();
747			} else if (c == '\\') {
748				switch (c = getsc()) {
749				case '\\':
750				case '$': case '`':
751					*wp++ = c;
752					break;
753				case '"':
754					if (statep->ls_sbquote.indquotes) {
755						*wp++ = c;
756						break;
757					}
758					/* FALLTHROUGH */
759				default:
760					if (c) {
761						/* trailing \ is lost */
762						*wp++ = '\\';
763						*wp++ = c;
764					}
765					break;
766				}
767			} else
768				*wp++ = c;
769			break;
770
771		case SWORD:	/* ONEWORD */
772			goto Subst;
773
774		case SLETPAREN:	/* LETEXPR: (( ... )) */
775			/*(*/
776			if (c == ')') {
777				if (statep->ls_sletparen.nparen > 0)
778					--statep->ls_sletparen.nparen;
779				else if ((c2 = getsc()) == /*(*/ ')') {
780					c = 0;
781					*wp++ = CQUOTE;
782					goto Done;
783				} else {
784					Source *s;
785
786					ungetsc(c2);
787					/* mismatched parenthesis -
788					 * assume we were really
789					 * parsing a $(...) expression
790					 */
791					*wp = EOS;
792					sp = Xstring(ws, wp);
793					dp = wdstrip(sp, true, false);
794					s = pushs(SREREAD, source->areap);
795					s->start = s->str = s->u.freeme = dp;
796					s->next = source;
797					source = s;
798					return ('('/*)*/);
799				}
800			} else if (c == '(')
801				/* parenthesis inside quotes and backslashes
802				 * are lost, but AT&T ksh doesn't count them
803				 * either
804				 */
805				++statep->ls_sletparen.nparen;
806			goto Sbase2;
807
808#ifndef MKSH_SMALL
809		case SLETARRAY:	/* LETARRAY: =( ... ) */
810			if (c == '('/*)*/)
811				++statep->ls_sletarray.nparen;
812			else if (c == /*(*/')')
813				if (statep->ls_sletarray.nparen-- == 0) {
814					c = 0;
815					goto Done;
816				}
817			*wp++ = CHAR;
818			*wp++ = c;
819			break;
820#endif
821
822		case SHERESTRING:	/* <<< delimiter */
823			if (c == '\\') {
824				c = getsc();
825				if (c) {
826					/* trailing \ is lost */
827					*wp++ = QCHAR;
828					*wp++ = c;
829				}
830				/* invoke quoting mode */
831				Xstring(ws, wp)[0] = QCHAR;
832			} else if (c == '$') {
833				if ((c2 = getsc()) == '\'') {
834					PUSH_STATE(SEQUOTE);
835					statep->ls_sequote.got_NUL = false;
836					goto sherestring_quoted;
837				}
838				ungetsc(c2);
839				goto sherestring_regular;
840			} else if (c == '\'') {
841				PUSH_STATE(SSQUOTE);
842 sherestring_quoted:
843				*wp++ = OQUOTE;
844				ignore_backslash_newline++;
845				/* invoke quoting mode */
846				Xstring(ws, wp)[0] = QCHAR;
847			} else if (c == '"') {
848				state = statep->ls_state = SHEREDQUOTE;
849				*wp++ = OQUOTE;
850				/* just don't IFS split; no quoting mode */
851			} else {
852 sherestring_regular:
853				*wp++ = CHAR;
854				*wp++ = c;
855			}
856			break;
857
858		case SHEREDELIM:	/* <<,<<- delimiter */
859			/* XXX chuck this state (and the next) - use
860			 * the existing states ($ and \`...` should be
861			 * stripped of their specialness after the
862			 * fact).
863			 */
864			/* here delimiters need a special case since
865			 * $ and `...` are not to be treated specially
866			 */
867			if (c == '\\') {
868				c = getsc();
869				if (c) {
870					/* trailing \ is lost */
871					*wp++ = QCHAR;
872					*wp++ = c;
873				}
874			} else if (c == '$') {
875				if ((c2 = getsc()) == '\'') {
876					PUSH_STATE(SEQUOTE);
877					statep->ls_sequote.got_NUL = false;
878					goto sheredelim_quoted;
879				}
880				ungetsc(c2);
881				goto sheredelim_regular;
882			} else if (c == '\'') {
883				PUSH_STATE(SSQUOTE);
884 sheredelim_quoted:
885				*wp++ = OQUOTE;
886				ignore_backslash_newline++;
887			} else if (c == '"') {
888				state = statep->ls_state = SHEREDQUOTE;
889				*wp++ = OQUOTE;
890			} else {
891 sheredelim_regular:
892				*wp++ = CHAR;
893				*wp++ = c;
894			}
895			break;
896
897		case SHEREDQUOTE:	/* " in <<,<<- delimiter */
898			if (c == '"') {
899				*wp++ = CQUOTE;
900				state = statep->ls_state =
901				    /* dp[1] == '<' means here string */
902				    Xstring(ws, wp)[1] == '<' ?
903				    SHERESTRING : SHEREDELIM;
904			} else {
905				if (c == '\\') {
906					switch (c = getsc()) {
907					case '\\': case '"':
908					case '$': case '`':
909						break;
910					default:
911						if (c) {
912							/* trailing \ lost */
913							*wp++ = CHAR;
914							*wp++ = '\\';
915						}
916						break;
917					}
918				}
919				*wp++ = CHAR;
920				*wp++ = c;
921			}
922			break;
923
924		case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
925			if ( /*(*/ c == ')') {
926				*wp++ = CPAT;
927				POP_STATE();
928			} else if (c == '|') {
929				*wp++ = SPAT;
930			} else if (c == '(') {
931				*wp++ = OPAT;
932				*wp++ = ' ';	/* simile for @ */
933				PUSH_STATE(SPATTERN);
934			} else
935				goto Sbase1;
936			break;
937		}
938	}
939 Done:
940	Xcheck(ws, wp);
941	if (statep != &states[1])
942		/* XXX figure out what is missing */
943		yyerror("no closing quote\n");
944
945#ifndef MKSH_SMALL
946	if (state == SLETARRAY && statep->ls_sletarray.nparen != -1)
947		yyerror("%s: ')' missing\n", T_synerr);
948#endif
949
950	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
951	if (state == SHEREDELIM || state == SHERESTRING)
952		state = SBASE;
953
954	dp = Xstring(ws, wp);
955	if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
956		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
957
958		if (Xlength(ws, wp) == 0)
959			iop->unit = c == '<' ? 0 : 1;
960		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
961			if (dp[c2] != CHAR)
962				goto no_iop;
963			if (!ksh_isdigit(dp[c2 + 1]))
964				goto no_iop;
965			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
966		}
967
968		if (iop->unit >= FDBASE)
969			goto no_iop;
970
971		if (c == '&') {
972			if ((c2 = getsc()) != '>') {
973				ungetsc(c2);
974				goto no_iop;
975			}
976			c = c2;
977			iop->flag = IOBASH;
978		} else
979			iop->flag = 0;
980
981		c2 = getsc();
982		/* <<, >>, <> are ok, >< is not */
983		if (c == c2 || (c == '<' && c2 == '>')) {
984			iop->flag |= c == c2 ?
985			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
986			if (iop->flag == IOHERE) {
987				if ((c2 = getsc()) == '-')
988					iop->flag |= IOSKIP;
989				else
990					ungetsc(c2);
991			}
992		} else if (c2 == '&')
993			iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
994		else {
995			iop->flag |= c == '>' ? IOWRITE : IOREAD;
996			if (c == '>' && c2 == '|')
997				iop->flag |= IOCLOB;
998			else
999				ungetsc(c2);
1000		}
1001
1002		iop->name = NULL;
1003		iop->delim = NULL;
1004		iop->heredoc = NULL;
1005		Xfree(ws, wp);	/* free word */
1006		yylval.iop = iop;
1007		return (REDIR);
1008 no_iop:
1009		;
1010	}
1011
1012	if (wp == dp && state == SBASE) {
1013		Xfree(ws, wp);	/* free word */
1014		/* no word, process LEX1 character */
1015		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
1016			if ((c2 = getsc()) == c)
1017				c = (c == ';') ? BREAK :
1018				    (c == '|') ? LOGOR :
1019				    (c == '&') ? LOGAND :
1020				    /* c == '(' ) */ MDPAREN;
1021			else if (c == '|' && c2 == '&')
1022				c = COPROC;
1023			else
1024				ungetsc(c2);
1025		} else if (c == '\n') {
1026			gethere(false);
1027			if (cf & CONTIN)
1028				goto Again;
1029		} else if (c == '\0')
1030			/* need here strings at EOF */
1031			gethere(true);
1032		return (c);
1033	}
1034
1035	*wp++ = EOS;		/* terminate word */
1036	yylval.cp = Xclose(ws, wp);
1037	if (state == SWORD || state == SLETPAREN
1038	    /* XXX ONEWORD? */
1039#ifndef MKSH_SMALL
1040	    || state == SLETARRAY
1041#endif
1042	    )
1043		return (LWORD);
1044
1045	/* unget terminator */
1046	ungetsc(c);
1047
1048	/*
1049	 * note: the alias-vs-function code below depends on several
1050	 * interna: starting from here, source->str is not modified;
1051	 * the way getsc() and ungetsc() operate; etc.
1052	 */
1053
1054	/* copy word to unprefixed string ident */
1055	sp = yylval.cp;
1056	dp = ident;
1057	if ((cf & HEREDELIM) && (sp[1] == '<'))
1058		while (dp < ident+IDENT) {
1059			if ((c = *sp++) == CHAR)
1060				*dp++ = *sp++;
1061			else if ((c != OQUOTE) && (c != CQUOTE))
1062				break;
1063		}
1064	else
1065		while (dp < ident+IDENT && (c = *sp++) == CHAR)
1066			*dp++ = *sp++;
1067	/* Make sure the ident array stays '\0' padded */
1068	memset(dp, 0, (ident+IDENT) - dp + 1);
1069	if (c != EOS)
1070		*ident = '\0';	/* word is not unquoted */
1071
1072	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
1073		struct tbl *p;
1074		uint32_t h = hash(ident);
1075
1076		/* { */
1077		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1078		    (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) {
1079			afree(yylval.cp, ATEMP);
1080			return (p->val.i);
1081		}
1082		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1083		    (p->flag & ISSET)) {
1084			/*
1085			 * this still points to the same character as the
1086			 * ungetsc'd terminator from above
1087			 */
1088			const char *cp = source->str;
1089
1090			/* prefer POSIX but not Korn functions over aliases */
1091			while (*cp == ' ' || *cp == '\t')
1092				/*
1093				 * this is like getsc() without skipping
1094				 * over Source boundaries (including not
1095				 * parsing ungetsc'd characters that got
1096				 * pushed into an SREREAD) which is what
1097				 * we want here anyway: find out whether
1098				 * the alias name is followed by a POSIX
1099				 * function definition (only the opening
1100				 * parenthesis is checked though)
1101				 */
1102				++cp;
1103			/* prefer functions over aliases */
1104			if (*cp == '(' /*)*/)
1105				/*
1106				 * delete alias upon encountering function
1107				 * definition
1108				 */
1109				ktdelete(p);
1110			else {
1111				Source *s = source;
1112
1113				while (s && (s->flags & SF_HASALIAS))
1114					if (s->u.tblp == p)
1115						return (LWORD);
1116					else
1117						s = s->next;
1118				/* push alias expansion */
1119				s = pushs(SALIAS, source->areap);
1120				s->start = s->str = p->val.s;
1121				s->u.tblp = p;
1122				s->flags |= SF_HASALIAS;
1123				s->next = source;
1124				if (source->type == SEOF) {
1125					/* prevent infinite recursion at EOS */
1126					source->u.tblp = p;
1127					source->flags |= SF_HASALIAS;
1128				}
1129				source = s;
1130				afree(yylval.cp, ATEMP);
1131				goto Again;
1132			}
1133		}
1134	}
1135
1136	return (LWORD);
1137}
1138
1139static void
1140gethere(bool iseof)
1141{
1142	struct ioword **p;
1143
1144	for (p = heres; p < herep; p++)
1145		if (iseof && (*p)->delim[1] != '<')
1146			/* only here strings at EOF */
1147			return;
1148		else
1149			readhere(*p);
1150	herep = heres;
1151}
1152
1153/*
1154 * read "<<word" text into temp file
1155 */
1156
1157static void
1158readhere(struct ioword *iop)
1159{
1160	int c;
1161	char *volatile eof;
1162	char *eofp;
1163	int skiptabs;
1164	XString xs;
1165	char *xp;
1166	int xpos;
1167
1168	if (iop->delim[1] == '<') {
1169		/* process the here string */
1170		xp = iop->heredoc = evalstr(iop->delim, DOBLANK);
1171		c = strlen(xp) - 1;
1172		memmove(xp, xp + 1, c);
1173		xp[c] = '\n';
1174		return;
1175	}
1176
1177	eof = evalstr(iop->delim, 0);
1178
1179	if (!(iop->flag & IOEVAL))
1180		ignore_backslash_newline++;
1181
1182	Xinit(xs, xp, 256, ATEMP);
1183
1184	for (;;) {
1185		eofp = eof;
1186		skiptabs = iop->flag & IOSKIP;
1187		xpos = Xsavepos(xs, xp);
1188		while ((c = getsc()) != 0) {
1189			if (skiptabs) {
1190				if (c == '\t')
1191					continue;
1192				skiptabs = 0;
1193			}
1194			if (c != *eofp)
1195				break;
1196			Xcheck(xs, xp);
1197			Xput(xs, xp, c);
1198			eofp++;
1199		}
1200		/* Allow EOF here so commands with out trailing newlines
1201		 * will work (eg, ksh -c '...', $(...), etc).
1202		 */
1203		if (*eofp == '\0' && (c == 0 || c == '\n')) {
1204			xp = Xrestpos(xs, xp, xpos);
1205			break;
1206		}
1207		ungetsc(c);
1208		while ((c = getsc()) != '\n') {
1209			if (c == 0)
1210				yyerror("here document '%s' unclosed\n", eof);
1211			Xcheck(xs, xp);
1212			Xput(xs, xp, c);
1213		}
1214		Xcheck(xs, xp);
1215		Xput(xs, xp, c);
1216	}
1217	Xput(xs, xp, '\0');
1218	iop->heredoc = Xclose(xs, xp);
1219
1220	if (!(iop->flag & IOEVAL))
1221		ignore_backslash_newline--;
1222}
1223
1224void
1225yyerror(const char *fmt, ...)
1226{
1227	va_list va;
1228
1229	/* pop aliases and re-reads */
1230	while (source->type == SALIAS || source->type == SREREAD)
1231		source = source->next;
1232	source->str = null;	/* zap pending input */
1233
1234	error_prefix(true);
1235	va_start(va, fmt);
1236	shf_vfprintf(shl_out, fmt, va);
1237	va_end(va);
1238	errorfz();
1239}
1240
1241/*
1242 * input for yylex with alias expansion
1243 */
1244
1245Source *
1246pushs(int type, Area *areap)
1247{
1248	Source *s;
1249
1250	s = alloc(sizeof(Source), areap);
1251	memset(s, 0, sizeof(Source));
1252	s->type = type;
1253	s->str = null;
1254	s->areap = areap;
1255	if (type == SFILE || type == SSTDIN)
1256		XinitN(s->xs, 256, s->areap);
1257	return (s);
1258}
1259
1260static int
1261getsc__(void)
1262{
1263	Source *s = source;
1264	int c;
1265
1266 getsc_again:
1267	while ((c = *s->str++) == 0) {
1268		s->str = NULL;		/* return 0 for EOF by default */
1269		switch (s->type) {
1270		case SEOF:
1271			s->str = null;
1272			return (0);
1273
1274		case SSTDIN:
1275		case SFILE:
1276			getsc_line(s);
1277			break;
1278
1279		case SWSTR:
1280			break;
1281
1282		case SSTRING:
1283			break;
1284
1285		case SWORDS:
1286			s->start = s->str = *s->u.strv++;
1287			s->type = SWORDSEP;
1288			break;
1289
1290		case SWORDSEP:
1291			if (*s->u.strv == NULL) {
1292				s->start = s->str = "\n";
1293				s->type = SEOF;
1294			} else {
1295				s->start = s->str = " ";
1296				s->type = SWORDS;
1297			}
1298			break;
1299
1300		case SALIAS:
1301			if (s->flags & SF_ALIASEND) {
1302				/* pass on an unused SF_ALIAS flag */
1303				source = s->next;
1304				source->flags |= s->flags & SF_ALIAS;
1305				s = source;
1306			} else if (*s->u.tblp->val.s &&
1307			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1308				source = s = s->next;	/* pop source stack */
1309				/* Note that this alias ended with a space,
1310				 * enabling alias expansion on the following
1311				 * word.
1312				 */
1313				s->flags |= SF_ALIAS;
1314			} else {
1315				/* At this point, we need to keep the current
1316				 * alias in the source list so recursive
1317				 * aliases can be detected and we also need
1318				 * to return the next character. Do this
1319				 * by temporarily popping the alias to get
1320				 * the next character and then put it back
1321				 * in the source list with the SF_ALIASEND
1322				 * flag set.
1323				 */
1324				source = s->next;	/* pop source stack */
1325				source->flags |= s->flags & SF_ALIAS;
1326				c = getsc__();
1327				if (c) {
1328					s->flags |= SF_ALIASEND;
1329					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1330					s->start = s->str = s->ugbuf;
1331					s->next = source;
1332					source = s;
1333				} else {
1334					s = source;
1335					/* avoid reading eof twice */
1336					s->str = NULL;
1337					break;
1338				}
1339			}
1340			continue;
1341
1342		case SREREAD:
1343			if (s->start != s->ugbuf)	/* yuck */
1344				afree(s->u.freeme, ATEMP);
1345			source = s = s->next;
1346			continue;
1347		}
1348		if (s->str == NULL) {
1349			s->type = SEOF;
1350			s->start = s->str = null;
1351			return ('\0');
1352		}
1353		if (s->flags & SF_ECHO) {
1354			shf_puts(s->str, shl_out);
1355			shf_flush(shl_out);
1356		}
1357	}
1358	/* check for UTF-8 byte order mark */
1359	if (s->flags & SF_FIRST) {
1360		s->flags &= ~SF_FIRST;
1361		if (((unsigned char)c == 0xEF) &&
1362		    (((const unsigned char *)(s->str))[0] == 0xBB) &&
1363		    (((const unsigned char *)(s->str))[1] == 0xBF)) {
1364			s->str += 2;
1365			UTFMODE = 1;
1366			goto getsc_again;
1367		}
1368	}
1369	return (c);
1370}
1371
1372static void
1373getsc_line(Source *s)
1374{
1375	char *xp = Xstring(s->xs, xp), *cp;
1376	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1377	int have_tty = interactive && (s->flags & SF_TTY);
1378
1379	/* Done here to ensure nothing odd happens when a timeout occurs */
1380	XcheckN(s->xs, xp, LINE);
1381	*xp = '\0';
1382	s->start = s->str = xp;
1383
1384	if (have_tty && ksh_tmout) {
1385		ksh_tmout_state = TMOUT_READING;
1386		alarm(ksh_tmout);
1387	}
1388	if (interactive)
1389		change_winsz();
1390	if (have_tty && (
1391#if !MKSH_S_NOVI
1392	    Flag(FVI) ||
1393#endif
1394	    Flag(FEMACS) || Flag(FGMACS))) {
1395		int nread;
1396
1397		nread = x_read(xp, LINE);
1398		if (nread < 0)	/* read error */
1399			nread = 0;
1400		xp[nread] = '\0';
1401		xp += nread;
1402	} else {
1403		if (interactive)
1404			pprompt(prompt, 0);
1405		else
1406			s->line++;
1407
1408		while (1) {
1409			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1410
1411			if (!p && shf_error(s->u.shf) &&
1412			    shf_errno(s->u.shf) == EINTR) {
1413				shf_clearerr(s->u.shf);
1414				if (trap)
1415					runtraps(0);
1416				continue;
1417			}
1418			if (!p || (xp = p, xp[-1] == '\n'))
1419				break;
1420			/* double buffer size */
1421			xp++;	/* move past NUL so doubling works... */
1422			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1423			xp--;	/* ...and move back again */
1424		}
1425		/* flush any unwanted input so other programs/builtins
1426		 * can read it. Not very optimal, but less error prone
1427		 * than flushing else where, dealing with redirections,
1428		 * etc.
1429		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1430		 */
1431		if (s->type == SSTDIN)
1432			shf_flush(s->u.shf);
1433	}
1434	/* XXX: temporary kludge to restore source after a
1435	 * trap may have been executed.
1436	 */
1437	source = s;
1438	if (have_tty && ksh_tmout) {
1439		ksh_tmout_state = TMOUT_EXECUTING;
1440		alarm(0);
1441	}
1442	cp = Xstring(s->xs, xp);
1443#ifndef MKSH_SMALL
1444	if (interactive && *cp == '!' && cur_prompt == PS1) {
1445		int linelen;
1446
1447		linelen = Xlength(s->xs, xp);
1448		XcheckN(s->xs, xp, fc_e_n + /* NUL */ 1);
1449		/* reload after potential realloc */
1450		cp = Xstring(s->xs, xp);
1451		/* change initial '!' into space */
1452		*cp = ' ';
1453		/* NUL terminate the current string */
1454		*xp = '\0';
1455		/* move the actual string forward */
1456		memmove(cp + fc_e_n, cp, linelen + /* NUL */ 1);
1457		xp += fc_e_n;
1458		/* prepend it with "fc -e -" */
1459		memcpy(cp, fc_e_, fc_e_n);
1460	}
1461#endif
1462	s->start = s->str = cp;
1463	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1464	/* Note: if input is all nulls, this is not eof */
1465	if (Xlength(s->xs, xp) == 0) {
1466		/* EOF */
1467		if (s->type == SFILE)
1468			shf_fdclose(s->u.shf);
1469		s->str = NULL;
1470	} else if (interactive && *s->str &&
1471	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1472		histsave(&s->line, s->str, true, true);
1473#if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1474	} else if (interactive && cur_prompt == PS1) {
1475		cp = Xstring(s->xs, xp);
1476		while (*cp && ctype(*cp, C_IFSWS))
1477			++cp;
1478		if (!*cp)
1479			histsync();
1480#endif
1481	}
1482	if (interactive)
1483		set_prompt(PS2, NULL);
1484}
1485
1486void
1487set_prompt(int to, Source *s)
1488{
1489	cur_prompt = to;
1490
1491	switch (to) {
1492	case PS1:	/* command */
1493		/* Substitute ! and !! here, before substitutions are done
1494		 * so ! in expanded variables are not expanded.
1495		 * NOTE: this is not what AT&T ksh does (it does it after
1496		 * substitutions, POSIX doesn't say which is to be done.
1497		 */
1498		{
1499			struct shf *shf;
1500			char * volatile ps1;
1501			Area *saved_atemp;
1502
1503			ps1 = str_val(global("PS1"));
1504			shf = shf_sopen(NULL, strlen(ps1) * 2,
1505			    SHF_WR | SHF_DYNAMIC, NULL);
1506			while (*ps1)
1507				if (*ps1 != '!' || *++ps1 == '!')
1508					shf_putchar(*ps1++, shf);
1509				else
1510					shf_fprintf(shf, "%d",
1511						s ? s->line + 1 : 0);
1512			ps1 = shf_sclose(shf);
1513			saved_atemp = ATEMP;
1514			newenv(E_ERRH);
1515			if (sigsetjmp(e->jbuf, 0)) {
1516				prompt = safe_prompt;
1517				/* Don't print an error - assume it has already
1518				 * been printed. Reason is we may have forked
1519				 * to run a command and the child may be
1520				 * unwinding its stack through this code as it
1521				 * exits.
1522				 */
1523			} else {
1524				char *cp = substitute(ps1, 0);
1525				strdupx(prompt, cp, saved_atemp);
1526			}
1527			quitenv(NULL);
1528		}
1529		break;
1530	case PS2:	/* command continuation */
1531		prompt = str_val(global("PS2"));
1532		break;
1533	}
1534}
1535
1536static int
1537dopprompt(const char *cp, int ntruncate, bool doprint)
1538{
1539	int columns = 0, lines = 0, indelimit = 0;
1540	char delimiter = 0;
1541
1542	/* Undocumented AT&T ksh feature:
1543	 * If the second char in the prompt string is \r then the first char
1544	 * is taken to be a non-printing delimiter and any chars between two
1545	 * instances of the delimiter are not considered to be part of the
1546	 * prompt length
1547	 */
1548	if (*cp && cp[1] == '\r') {
1549		delimiter = *cp;
1550		cp += 2;
1551	}
1552	for (; *cp; cp++) {
1553		if (indelimit && *cp != delimiter)
1554			;
1555		else if (*cp == '\n' || *cp == '\r') {
1556			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1557			columns = 0;
1558		} else if (*cp == '\t') {
1559			columns = (columns | 7) + 1;
1560		} else if (*cp == '\b') {
1561			if (columns > 0)
1562				columns--;
1563		} else if (*cp == delimiter)
1564			indelimit = !indelimit;
1565		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1566			const char *cp2;
1567			columns += utf_widthadj(cp, &cp2);
1568			if (doprint && (indelimit ||
1569			    (ntruncate < (x_cols * lines + columns))))
1570				shf_write(cp, cp2 - cp, shl_out);
1571			cp = cp2 - /* loop increment */ 1;
1572			continue;
1573		} else
1574			columns++;
1575		if (doprint && (*cp != delimiter) &&
1576		    (indelimit || (ntruncate < (x_cols * lines + columns))))
1577			shf_putc(*cp, shl_out);
1578	}
1579	if (doprint)
1580		shf_flush(shl_out);
1581	return (x_cols * lines + columns);
1582}
1583
1584
1585void
1586pprompt(const char *cp, int ntruncate)
1587{
1588	dopprompt(cp, ntruncate, true);
1589}
1590
1591int
1592promptlen(const char *cp)
1593{
1594	return (dopprompt(cp, 0, false));
1595}
1596
1597/* Read the variable part of a ${...} expression (ie, up to but not including
1598 * the :[-+?=#%] or close-brace.
1599 */
1600static char *
1601get_brace_var(XString *wsp, char *wp)
1602{
1603	enum parse_state {
1604		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1605		PS_NUMBER, PS_VAR1
1606	} state;
1607	char c;
1608
1609	state = PS_INITIAL;
1610	while (1) {
1611		c = getsc();
1612		/* State machine to figure out where the variable part ends. */
1613		switch (state) {
1614		case PS_INITIAL:
1615			if (c == '#' || c == '!' || c == '%') {
1616				state = PS_SAW_HASH;
1617				break;
1618			}
1619			/* FALLTHROUGH */
1620		case PS_SAW_HASH:
1621			if (ksh_isalphx(c))
1622				state = PS_IDENT;
1623			else if (ksh_isdigit(c))
1624				state = PS_NUMBER;
1625			else if (ctype(c, C_VAR1))
1626				state = PS_VAR1;
1627			else
1628				goto out;
1629			break;
1630		case PS_IDENT:
1631			if (!ksh_isalnux(c)) {
1632				if (c == '[') {
1633					char *tmp, *p;
1634
1635					if (!arraysub(&tmp))
1636						yyerror("missing ]\n");
1637					*wp++ = c;
1638					for (p = tmp; *p; ) {
1639						Xcheck(*wsp, wp);
1640						*wp++ = *p++;
1641					}
1642					afree(tmp, ATEMP);
1643					c = getsc();	/* the ] */
1644				}
1645				goto out;
1646			}
1647			break;
1648		case PS_NUMBER:
1649			if (!ksh_isdigit(c))
1650				goto out;
1651			break;
1652		case PS_VAR1:
1653			goto out;
1654		}
1655		Xcheck(*wsp, wp);
1656		*wp++ = c;
1657	}
1658 out:
1659	*wp++ = '\0';	/* end of variable part */
1660	ungetsc(c);
1661	return (wp);
1662}
1663
1664/*
1665 * Save an array subscript - returns true if matching bracket found, false
1666 * if eof or newline was found.
1667 * (Returned string double null terminated)
1668 */
1669static int
1670arraysub(char **strp)
1671{
1672	XString ws;
1673	char	*wp;
1674	char	c;
1675	int	depth = 1;	/* we are just past the initial [ */
1676
1677	Xinit(ws, wp, 32, ATEMP);
1678
1679	do {
1680		c = getsc();
1681		Xcheck(ws, wp);
1682		*wp++ = c;
1683		if (c == '[')
1684			depth++;
1685		else if (c == ']')
1686			depth--;
1687	} while (depth > 0 && c && c != '\n');
1688
1689	*wp++ = '\0';
1690	*strp = Xclose(ws, wp);
1691
1692	return (depth == 0 ? 1 : 0);
1693}
1694
1695/* Unget a char: handles case when we are already at the start of the buffer */
1696static const char *
1697ungetsc(int c)
1698{
1699	if (backslash_skip)
1700		backslash_skip--;
1701	/* Don't unget eof... */
1702	if (source->str == null && c == '\0')
1703		return (source->str);
1704	if (source->str > source->start)
1705		source->str--;
1706	else {
1707		Source *s;
1708
1709		s = pushs(SREREAD, source->areap);
1710		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1711		s->start = s->str = s->ugbuf;
1712		s->next = source;
1713		source = s;
1714	}
1715	return (source->str);
1716}
1717
1718
1719/* Called to get a char that isn't a \newline sequence. */
1720static int
1721getsc_bn(void)
1722{
1723	int c, c2;
1724
1725	if (ignore_backslash_newline)
1726		return (getsc_());
1727
1728	if (backslash_skip == 1) {
1729		backslash_skip = 2;
1730		return (getsc_());
1731	}
1732
1733	backslash_skip = 0;
1734
1735	while (1) {
1736		c = getsc_();
1737		if (c == '\\') {
1738			if ((c2 = getsc_()) == '\n')
1739				/* ignore the \newline; get the next char... */
1740				continue;
1741			ungetsc(c2);
1742			backslash_skip = 1;
1743		}
1744		return (c);
1745	}
1746}
1747
1748static Lex_state *
1749push_state_(State_info *si, Lex_state *old_end)
1750{
1751	Lex_state *news = alloc(STATE_BSIZE * sizeof(Lex_state), ATEMP);
1752
1753	news[0].ls_info.base = old_end;
1754	si->base = &news[0];
1755	si->end = &news[STATE_BSIZE];
1756	return (&news[1]);
1757}
1758
1759static Lex_state *
1760pop_state_(State_info *si, Lex_state *old_end)
1761{
1762	Lex_state *old_base = si->base;
1763
1764	si->base = old_end->ls_info.base - STATE_BSIZE;
1765	si->end = old_end->ls_info.base;
1766
1767	afree(old_base, ATEMP);
1768
1769	return (si->base + STATE_BSIZE - 1);
1770}
1771
1772static int
1773s_get(void)
1774{
1775	return (getsc());
1776}
1777
1778static void
1779s_put(int c)
1780{
1781	ungetsc(c);
1782}
1783