1/*	$OpenBSD: eval.c,v 1.40 2013/09/14 20:09:30 millert Exp $	*/
2
3/*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 *		 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
6 *	mirabilos <m@mirbsd.org>
7 *
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
13 *
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
22 */
23
24#include "sh.h"
25
26__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.219 2018/01/14 01:29:47 tg Exp $");
27
28/*
29 * string expansion
30 *
31 * first pass: quoting, IFS separation, ~, ${}, $() and $(()) substitution.
32 * second pass: alternation ({,}), filename expansion (*?[]).
33 */
34
35/* expansion generator state */
36typedef struct {
37	/* not including an "int type;" member, see expand() */
38	/* string */
39	const char *str;
40	/* source */
41	union {
42		/* string[] */
43		const char **strv;
44		/* file */
45		struct shf *shf;
46	} u;
47	/* variable in ${var...} */
48	struct tbl *var;
49	/* split "$@" / call waitlast in $() */
50	bool split;
51} Expand;
52
53#define	XBASE		0	/* scanning original */
54#define	XSUB		1	/* expanding ${} string */
55#define	XARGSEP		2	/* ifs0 between "$*" */
56#define	XARG		3	/* expanding $*, $@ */
57#define	XCOM		4	/* expanding $() */
58#define XNULLSUB	5	/* "$@" when $# is 0 (don't generate word) */
59#define XSUBMID		6	/* middle of expanding ${} */
60
61/* States used for field splitting */
62#define IFS_WORD	0	/* word has chars (or quotes except "$@") */
63#define IFS_WS		1	/* have seen IFS white-space */
64#define IFS_NWS		2	/* have seen IFS non-white-space */
65#define IFS_IWS		3	/* beginning of word, ignore IFS WS */
66#define IFS_QUOTE	4	/* beg.w/quote, become IFS_WORD unless "$@" */
67
68#define STYPE_CHAR	0xFF
69#define STYPE_DBL	0x100
70#define STYPE_AT	0x200
71#define STYPE_SINGLE	0x2FF
72#define STYPE_MASK	0x300
73
74static int varsub(Expand *, const char *, const char *, int *, int *);
75static int comsub(Expand *, const char *, int);
76static char *valsub(struct op *, Area *);
77static char *trimsub(char *, char *, int);
78static void glob(char *, XPtrV *, bool);
79static void globit(XString *, char **, char *, XPtrV *, int);
80static const char *maybe_expand_tilde(const char *, XString *, char **, bool);
81#ifndef MKSH_NOPWNAM
82static char *homedir(char *);
83#endif
84static void alt_expand(XPtrV *, char *, char *, char *, int);
85static int utflen(const char *) MKSH_A_PURE;
86static void utfincptr(const char *, mksh_ari_t *);
87
88/* UTFMODE functions */
89static int
90utflen(const char *s)
91{
92	size_t n;
93
94	if (UTFMODE) {
95		n = 0;
96		while (*s) {
97			s += utf_ptradj(s);
98			++n;
99		}
100	} else
101		n = strlen(s);
102
103	if (n > 2147483647)
104		n = 2147483647;
105	return ((int)n);
106}
107
108static void
109utfincptr(const char *s, mksh_ari_t *lp)
110{
111	const char *cp = s;
112
113	while ((*lp)--)
114		cp += utf_ptradj(cp);
115	*lp = cp - s;
116}
117
118/* compile and expand word */
119char *
120substitute(const char *cp, int f)
121{
122	struct source *s, *sold;
123
124	sold = source;
125	s = pushs(SWSTR, ATEMP);
126	s->start = s->str = cp;
127	source = s;
128	if (yylex(ONEWORD) != LWORD)
129		internal_errorf(Tbadsubst);
130	source = sold;
131	afree(s, ATEMP);
132	return (evalstr(yylval.cp, f));
133}
134
135/*
136 * expand arg-list
137 */
138char **
139eval(const char **ap, int f)
140{
141	XPtrV w;
142
143	if (*ap == NULL) {
144		union mksh_ccphack vap;
145
146		vap.ro = ap;
147		return (vap.rw);
148	}
149	XPinit(w, 32);
150	/* space for shell name */
151	XPput(w, NULL);
152	while (*ap != NULL)
153		expand(*ap++, &w, f);
154	XPput(w, NULL);
155	return ((char **)XPclose(w) + 1);
156}
157
158/*
159 * expand string
160 */
161char *
162evalstr(const char *cp, int f)
163{
164	XPtrV w;
165	char *dp = null;
166
167	XPinit(w, 1);
168	expand(cp, &w, f);
169	if (XPsize(w))
170		dp = *XPptrv(w);
171	XPfree(w);
172	return (dp);
173}
174
175/*
176 * expand string - return only one component
177 * used from iosetup to expand redirection files
178 */
179char *
180evalonestr(const char *cp, int f)
181{
182	XPtrV w;
183	char *rv;
184
185	XPinit(w, 1);
186	expand(cp, &w, f);
187	switch (XPsize(w)) {
188	case 0:
189		rv = null;
190		break;
191	case 1:
192		rv = (char *) *XPptrv(w);
193		break;
194	default:
195		rv = evalstr(cp, f & ~DOGLOB);
196		break;
197	}
198	XPfree(w);
199	return (rv);
200}
201
202/* for nested substitution: ${var:=$var2} */
203typedef struct SubType {
204	struct tbl *var;	/* variable for ${var..} */
205	struct SubType *prev;	/* old type */
206	struct SubType *next;	/* poped type (to avoid re-allocating) */
207	size_t	base;		/* start position of expanded word */
208	short	stype;		/* [=+-?%#] action after expanded word */
209	short	f;		/* saved value of f (DOPAT, etc) */
210	uint8_t	quotep;		/* saved value of quote (for ${..[%#]..}) */
211	uint8_t	quotew;		/* saved value of quote (for ${..[+-=]..}) */
212} SubType;
213
214void
215expand(
216    /* input word */
217    const char *ccp,
218    /* output words */
219    XPtrV *wp,
220    /* DO* flags */
221    int f)
222{
223	int c = 0;
224	/* expansion type */
225	int type;
226	/* quoted */
227	int quote = 0;
228	/* destination string and live pointer */
229	XString ds;
230	char *dp;
231	/* source */
232	const char *sp;
233	/* second pass flags */
234	int fdo;
235	/* have word */
236	int word;
237	/* field splitting of parameter/command substitution */
238	int doblank;
239	/* expansion variables */
240	Expand x = {
241		NULL, { NULL }, NULL, 0
242	};
243	SubType st_head, *st;
244	/* record number of trailing newlines in COMSUB */
245	int newlines = 0;
246	bool saw_eq, make_magic;
247	unsigned int tilde_ok;
248	size_t len;
249	char *cp;
250
251	if (ccp == NULL)
252		internal_errorf("expand(NULL)");
253	/* for alias, readonly, set, typeset commands */
254	if ((f & DOVACHECK) && is_wdvarassign(ccp)) {
255		f &= ~(DOVACHECK | DOBLANK | DOGLOB | DOTILDE);
256		f |= DOASNTILDE | DOSCALAR;
257	}
258	if (Flag(FNOGLOB))
259		f &= ~DOGLOB;
260	if (Flag(FMARKDIRS))
261		f |= DOMARKDIRS;
262	if (Flag(FBRACEEXPAND) && (f & DOGLOB))
263		f |= DOBRACE;
264
265	/* init destination string */
266	Xinit(ds, dp, 128, ATEMP);
267	type = XBASE;
268	sp = ccp;
269	fdo = 0;
270	saw_eq = false;
271	/* must be 1/0 */
272	tilde_ok = (f & (DOTILDE | DOASNTILDE)) ? 1 : 0;
273	doblank = 0;
274	make_magic = false;
275	word = (f&DOBLANK) ? IFS_WS : IFS_WORD;
276	/* clang doesn't know OSUBST comes before CSUBST */
277	memset(&st_head, 0, sizeof(st_head));
278	st = &st_head;
279
280	while (/* CONSTCOND */ 1) {
281		Xcheck(ds, dp);
282
283		switch (type) {
284		case XBASE:
285			/* original prefixed string */
286			c = ord(*sp++);
287			switch (c) {
288			case EOS:
289				c = 0;
290				break;
291			case CHAR:
292				c = ord(*sp++);
293				break;
294			case QCHAR:
295				/* temporary quote */
296				quote |= 2;
297				c = ord(*sp++);
298				break;
299			case OQUOTE:
300				if (word != IFS_WORD)
301					word = IFS_QUOTE;
302				tilde_ok = 0;
303				quote = 1;
304				continue;
305			case CQUOTE:
306				if (word == IFS_QUOTE)
307					word = IFS_WORD;
308				quote = st->quotew;
309				continue;
310			case COMASUB:
311			case COMSUB:
312			case FUNASUB:
313			case FUNSUB:
314			case VALSUB:
315				tilde_ok = 0;
316				if (f & DONTRUNCOMMAND) {
317					word = IFS_WORD;
318					*dp++ = '$';
319					switch (c) {
320					case COMASUB:
321					case COMSUB:
322						*dp++ = '(';
323						c = ORD(')');
324						break;
325					case FUNASUB:
326					case FUNSUB:
327					case VALSUB:
328						*dp++ = '{';
329						*dp++ = c == VALSUB ? '|' : ' ';
330						c = ORD('}');
331						break;
332					}
333					while (*sp != '\0') {
334						Xcheck(ds, dp);
335						*dp++ = *sp++;
336					}
337					if ((unsigned int)c == ORD('}'))
338						*dp++ = ';';
339					*dp++ = c;
340				} else {
341					type = comsub(&x, sp, c);
342					if (type != XBASE && (f & DOBLANK))
343						doblank++;
344					sp = strnul(sp) + 1;
345					newlines = 0;
346				}
347				continue;
348			case EXPRSUB:
349				tilde_ok = 0;
350				if (f & DONTRUNCOMMAND) {
351					word = IFS_WORD;
352					*dp++ = '$'; *dp++ = '('; *dp++ = '(';
353					while (*sp != '\0') {
354						Xcheck(ds, dp);
355						*dp++ = *sp++;
356					}
357					*dp++ = ')'; *dp++ = ')';
358				} else {
359					struct tbl v;
360
361					v.flag = DEFINED|ISSET|INTEGER;
362					/* not default */
363					v.type = 10;
364					v.name[0] = '\0';
365					v_evaluate(&v, substitute(sp, 0),
366					    KSH_UNWIND_ERROR, true);
367					sp = strnul(sp) + 1;
368					x.str = str_val(&v);
369					type = XSUB;
370					if (f & DOBLANK)
371						doblank++;
372				}
373				continue;
374			case OSUBST: {
375				/* ${{#}var{:}[=+-?#%]word} */
376			/*-
377			 * format is:
378			 *	OSUBST [{x] plain-variable-part \0
379			 *	    compiled-word-part CSUBST [}x]
380			 * This is where all syntax checking gets done...
381			 */
382				/* skip the { or x (}) */
383				const char *varname = ++sp;
384				int stype;
385				int slen = 0;
386
387				/* skip variable */
388				sp = cstrchr(sp, '\0') + 1;
389				type = varsub(&x, varname, sp, &stype, &slen);
390				if (type < 0) {
391					char *beg, *end, *str;
392 unwind_substsyn:
393					/* restore sp */
394					sp = varname - 2;
395					beg = wdcopy(sp, ATEMP);
396					end = (wdscan(cstrchr(sp, '\0') + 1,
397					    CSUBST) - sp) + beg;
398					/* ({) the } or x is already skipped */
399					if (end < wdscan(beg, EOS))
400						*end = EOS;
401					str = snptreef(NULL, 64, Tf_S, beg);
402					afree(beg, ATEMP);
403					errorf(Tf_sD_s, str, Tbadsubst);
404				}
405				if (f & DOBLANK)
406					doblank++;
407				tilde_ok = 0;
408				if (word == IFS_QUOTE && type != XNULLSUB)
409					word = IFS_WORD;
410				if (type == XBASE) {
411					/* expand? */
412					if (!st->next) {
413						SubType *newst;
414
415						newst = alloc(sizeof(SubType), ATEMP);
416						newst->next = NULL;
417						newst->prev = st;
418						st->next = newst;
419					}
420					st = st->next;
421					st->stype = stype;
422					st->base = Xsavepos(ds, dp);
423					st->f = f;
424					if (x.var == vtemp) {
425						st->var = tempvar(vtemp->name);
426						st->var->flag &= ~INTEGER;
427						/* can't fail here */
428						setstr(st->var,
429						    str_val(x.var),
430						    KSH_RETURN_ERROR | 0x4);
431					} else
432						st->var = x.var;
433
434					st->quotew = st->quotep = quote;
435					/* skip qualifier(s) */
436					if (stype)
437						sp += slen;
438					switch (stype & STYPE_SINGLE) {
439					case ORD('#') | STYPE_AT:
440						x.str = shf_smprintf("%08X",
441						    (unsigned int)hash(str_val(st->var)));
442						break;
443					case ORD('Q') | STYPE_AT: {
444						struct shf shf;
445
446						shf_sopen(NULL, 0, SHF_WR|SHF_DYNAMIC, &shf);
447						print_value_quoted(&shf, str_val(st->var));
448						x.str = shf_sclose(&shf);
449						break;
450					    }
451					case ORD('0'): {
452						char *beg, *mid, *end, *stg;
453						mksh_ari_t from = 0, num = -1, flen, finc = 0;
454
455						beg = wdcopy(sp, ATEMP);
456						mid = beg + (wdscan(sp, ADELIM) - sp);
457						stg = beg + (wdscan(sp, CSUBST) - sp);
458						mid[-2] = EOS;
459						if (ord(mid[-1]) == ORD(/*{*/ '}')) {
460							sp += mid - beg - 1;
461							end = NULL;
462						} else {
463							end = mid +
464							    (wdscan(mid, ADELIM) - mid);
465							if (ord(end[-1]) != ORD(/*{*/ '}'))
466								/* more than max delimiters */
467								goto unwind_substsyn;
468							end[-2] = EOS;
469							sp += end - beg - 1;
470						}
471						evaluate(substitute(stg = wdstrip(beg, 0), 0),
472						    &from, KSH_UNWIND_ERROR, true);
473						afree(stg, ATEMP);
474						if (end) {
475							evaluate(substitute(stg = wdstrip(mid, 0), 0),
476							    &num, KSH_UNWIND_ERROR, true);
477							afree(stg, ATEMP);
478						}
479						afree(beg, ATEMP);
480						beg = str_val(st->var);
481						flen = utflen(beg);
482						if (from < 0) {
483							if (-from < flen)
484								finc = flen + from;
485						} else
486							finc = from < flen ? from : flen;
487						if (UTFMODE)
488							utfincptr(beg, &finc);
489						beg += finc;
490						flen = utflen(beg);
491						if (num < 0 || num > flen)
492							num = flen;
493						if (UTFMODE)
494							utfincptr(beg, &num);
495						strndupx(x.str, beg, num, ATEMP);
496						goto do_CSUBST;
497					    }
498					case ORD('/') | STYPE_AT:
499					case ORD('/'): {
500						char *s, *p, *d, *sbeg, *end;
501						char *pat = NULL, *rrep = null;
502						char fpat = 0, *tpat1, *tpat2;
503						char *ws, *wpat, *wrep;
504
505						s = ws = wdcopy(sp, ATEMP);
506						p = s + (wdscan(sp, ADELIM) - sp);
507						d = s + (wdscan(sp, CSUBST) - sp);
508						p[-2] = EOS;
509						if (ord(p[-1]) == ORD(/*{*/ '}'))
510							d = NULL;
511						else
512							d[-2] = EOS;
513						sp += (d ? d : p) - s - 1;
514						if (!(stype & STYPE_MASK) &&
515						    s[0] == CHAR &&
516						    ctype(s[1], C_SUB2))
517							fpat = s[1];
518						wpat = s + (fpat ? 2 : 0);
519						wrep = d ? p : NULL;
520						if (!(stype & STYPE_AT)) {
521							rrep = wrep ? evalstr(wrep,
522							    DOTILDE | DOSCALAR) :
523							    null;
524						}
525
526						/* prepare string on which to work */
527						strdupx(s, str_val(st->var), ATEMP);
528						sbeg = s;
529 again_search:
530						pat = evalstr(wpat,
531						    DOTILDE | DOSCALAR | DOPAT);
532						/* check for special cases */
533						if (!*pat && !fpat) {
534							/*
535							 * empty unanchored
536							 * pattern => reject
537							 */
538							goto no_repl;
539						}
540						if ((stype & STYPE_MASK) &&
541						    gmatchx(null, pat, false)) {
542							/*
543							 * pattern matches empty
544							 * string => don't loop
545							 */
546							stype &= ~STYPE_MASK;
547						}
548
549						/* first see if we have any match at all */
550						if (ord(fpat) == ORD('#')) {
551							/* anchor at the beginning */
552							tpat1 = shf_smprintf("%s%c*", pat, MAGIC);
553							tpat2 = tpat1;
554						} else if (ord(fpat) == ORD('%')) {
555							/* anchor at the end */
556							tpat1 = shf_smprintf("%c*%s", MAGIC, pat);
557							tpat2 = pat;
558						} else {
559							/* float */
560							tpat1 = shf_smprintf("%c*%s%c*", MAGIC, pat, MAGIC);
561							tpat2 = tpat1 + 2;
562						}
563 again_repl:
564						/*
565						 * this would not be necessary if gmatchx would return
566						 * the start and end values of a match found, like re*
567						 */
568						if (!gmatchx(sbeg, tpat1, false))
569							goto end_repl;
570						end = strnul(s);
571						/* now anchor the beginning of the match */
572						if (ord(fpat) != ORD('#'))
573							while (sbeg <= end) {
574								if (gmatchx(sbeg, tpat2, false))
575									break;
576								else
577									sbeg++;
578							}
579						/* now anchor the end of the match */
580						p = end;
581						if (ord(fpat) != ORD('%'))
582							while (p >= sbeg) {
583								bool gotmatch;
584
585								c = ord(*p);
586								*p = '\0';
587								gotmatch = tobool(gmatchx(sbeg, pat, false));
588								*p = c;
589								if (gotmatch)
590									break;
591								p--;
592							}
593						strndupx(end, sbeg, p - sbeg, ATEMP);
594						record_match(end);
595						afree(end, ATEMP);
596						if (stype & STYPE_AT) {
597							if (rrep != null)
598								afree(rrep, ATEMP);
599							rrep = wrep ? evalstr(wrep,
600							    DOTILDE | DOSCALAR) :
601							    null;
602						}
603						strndupx(end, s, sbeg - s, ATEMP);
604						d = shf_smprintf(Tf_sss, end, rrep, p);
605						afree(end, ATEMP);
606						sbeg = d + (sbeg - s) + strlen(rrep);
607						afree(s, ATEMP);
608						s = d;
609						if (stype & STYPE_AT) {
610							afree(tpat1, ATEMP);
611							afree(pat, ATEMP);
612							goto again_search;
613						} else if (stype & STYPE_DBL)
614							goto again_repl;
615 end_repl:
616						afree(tpat1, ATEMP);
617						x.str = s;
618 no_repl:
619						afree(pat, ATEMP);
620						if (rrep != null)
621							afree(rrep, ATEMP);
622						afree(ws, ATEMP);
623						goto do_CSUBST;
624					    }
625					case ORD('#'):
626					case ORD('%'):
627						/* ! DOBLANK,DOBRACE */
628						f = (f & DONTRUNCOMMAND) |
629						    DOPAT | DOTILDE |
630						    DOTEMP | DOSCALAR;
631						tilde_ok = 1;
632						st->quotew = quote = 0;
633						/*
634						 * Prepend open pattern (so |
635						 * in a trim will work as
636						 * expected)
637						 */
638						if (!Flag(FSH)) {
639							*dp++ = MAGIC;
640							*dp++ = ORD(0x80 | '@');
641						}
642						break;
643					case ORD('='):
644						/*
645						 * Tilde expansion for string
646						 * variables in POSIX mode is
647						 * governed by Austinbug 351.
648						 * In non-POSIX mode historic
649						 * ksh behaviour (enable it!)
650						 * us followed.
651						 * Not doing tilde expansion
652						 * for integer variables is a
653						 * non-POSIX thing - makes
654						 * sense though, since ~ is
655						 * a arithmetic operator.
656						 */
657						if (!(x.var->flag & INTEGER))
658							f |= DOASNTILDE | DOTILDE;
659						f |= DOTEMP | DOSCALAR;
660						/*
661						 * These will be done after the
662						 * value has been assigned.
663						 */
664						f &= ~(DOBLANK|DOGLOB|DOBRACE);
665						tilde_ok = 1;
666						break;
667					case ORD('?'):
668						if (*sp == CSUBST)
669							errorf("%s: parameter null or not set",
670							    st->var->name);
671						f &= ~DOBLANK;
672						f |= DOTEMP;
673						/* FALLTHROUGH */
674					default:
675						/* '-' '+' '?' */
676						if (quote)
677							word = IFS_WORD;
678						else if (dp == Xstring(ds, dp))
679							word = IFS_IWS;
680						/* Enable tilde expansion */
681						tilde_ok = 1;
682						f |= DOTILDE;
683					}
684				} else
685					/* skip word */
686					sp += wdscan(sp, CSUBST) - sp;
687				continue;
688			    }
689			case CSUBST:
690				/* only get here if expanding word */
691 do_CSUBST:
692				/* ({) skip the } or x */
693				sp++;
694				/* in case of ${unset:-} */
695				tilde_ok = 0;
696				*dp = '\0';
697				quote = st->quotep;
698				f = st->f;
699				if (f & DOBLANK)
700					doblank--;
701				switch (st->stype & STYPE_SINGLE) {
702				case ORD('#'):
703				case ORD('%'):
704					if (!Flag(FSH)) {
705						/* Append end-pattern */
706						*dp++ = MAGIC;
707						*dp++ = ')';
708					}
709					*dp = '\0';
710					dp = Xrestpos(ds, dp, st->base);
711					/*
712					 * Must use st->var since calling
713					 * global would break things
714					 * like x[i+=1].
715					 */
716					x.str = trimsub(str_val(st->var),
717						dp, st->stype);
718					if (x.str[0] != '\0') {
719						word = IFS_IWS;
720						type = XSUB;
721					} else if (quote) {
722						word = IFS_WORD;
723						type = XSUB;
724					} else {
725						if (dp == Xstring(ds, dp))
726							word = IFS_IWS;
727						type = XNULLSUB;
728					}
729					if (f & DOBLANK)
730						doblank++;
731					st = st->prev;
732					continue;
733				case ORD('='):
734					/*
735					 * Restore our position and substitute
736					 * the value of st->var (may not be
737					 * the assigned value in the presence
738					 * of integer/right-adj/etc attributes).
739					 */
740					dp = Xrestpos(ds, dp, st->base);
741					/*
742					 * Must use st->var since calling
743					 * global would cause with things
744					 * like x[i+=1] to be evaluated twice.
745					 */
746					/*
747					 * Note: not exported by FEXPORT
748					 * in AT&T ksh.
749					 */
750					/*
751					 * XXX POSIX says readonly is only
752					 * fatal for special builtins (setstr
753					 * does readonly check).
754					 */
755					len = strlen(dp) + 1;
756					setstr(st->var,
757					    debunk(alloc(len, ATEMP),
758					    dp, len), KSH_UNWIND_ERROR);
759					x.str = str_val(st->var);
760					type = XSUB;
761					if (f & DOBLANK)
762						doblank++;
763					st = st->prev;
764					word = quote || (!*x.str && (f & DOSCALAR)) ? IFS_WORD : IFS_IWS;
765					continue;
766				case ORD('?'):
767					dp = Xrestpos(ds, dp, st->base);
768
769					errorf(Tf_sD_s, st->var->name,
770					    debunk(dp, dp, strlen(dp) + 1));
771					break;
772				case ORD('0'):
773				case ORD('/') | STYPE_AT:
774				case ORD('/'):
775				case ORD('#') | STYPE_AT:
776				case ORD('Q') | STYPE_AT:
777					dp = Xrestpos(ds, dp, st->base);
778					type = XSUB;
779					word = quote || (!*x.str && (f & DOSCALAR)) ? IFS_WORD : IFS_IWS;
780					if (f & DOBLANK)
781						doblank++;
782					st = st->prev;
783					continue;
784				/* default: '-' '+' */
785				}
786				st = st->prev;
787				type = XBASE;
788				continue;
789
790			case OPAT:
791				/* open pattern: *(foo|bar) */
792				/* Next char is the type of pattern */
793				make_magic = true;
794				c = ord(*sp++) | 0x80U;
795				break;
796
797			case SPAT:
798				/* pattern separator (|) */
799				make_magic = true;
800				c = ORD('|');
801				break;
802
803			case CPAT:
804				/* close pattern */
805				make_magic = true;
806				c = ORD(/*(*/ ')');
807				break;
808			}
809			break;
810
811		case XNULLSUB:
812			/*
813			 * Special case for "$@" (and "${foo[@]}") - no
814			 * word is generated if $# is 0 (unless there is
815			 * other stuff inside the quotes).
816			 */
817			type = XBASE;
818			if (f & DOBLANK) {
819				doblank--;
820				if (dp == Xstring(ds, dp) && word != IFS_WORD)
821					word = IFS_IWS;
822			}
823			continue;
824
825		case XSUB:
826		case XSUBMID:
827			if ((c = ord(*x.str++)) == 0) {
828				type = XBASE;
829				if (f & DOBLANK)
830					doblank--;
831				continue;
832			}
833			break;
834
835		case XARGSEP:
836			type = XARG;
837			quote = 1;
838			/* FALLTHROUGH */
839		case XARG:
840			if ((c = ord(*x.str++)) == '\0') {
841				/*
842				 * force null words to be created so
843				 * set -- "" 2 ""; echo "$@" will do
844				 * the right thing
845				 */
846				if (quote && x.split)
847					word = IFS_WORD;
848				if ((x.str = *x.u.strv++) == NULL) {
849					type = XBASE;
850					if (f & DOBLANK)
851						doblank--;
852					continue;
853				}
854				c = ord(ifs0);
855				if ((f & DOHEREDOC)) {
856					/* pseudo-field-split reliably */
857					if (c == 0)
858						c = ORD(' ');
859					break;
860				}
861				if ((f & DOSCALAR)) {
862					/* do not field-split */
863					if (x.split) {
864						c = ORD(' ');
865						break;
866					}
867					if (c == 0)
868						continue;
869				}
870				if (c == 0) {
871					if (quote && !x.split)
872						continue;
873					if (!quote && word == IFS_WS)
874						continue;
875					/* this is so we don't terminate */
876					c = ORD(' ');
877					/* now force-emit a word */
878					goto emit_word;
879				}
880				if (quote && x.split) {
881					/* terminate word for "$@" */
882					type = XARGSEP;
883					quote = 0;
884				}
885			}
886			break;
887
888		case XCOM:
889			if (x.u.shf == NULL) {
890				/* $(<...) failed */
891				subst_exstat = 1;
892				/* fake EOF */
893				c = -1;
894			} else if (newlines) {
895				/* spit out saved NLs */
896				c = ORD('\n');
897				--newlines;
898			} else {
899				while ((c = shf_getc(x.u.shf)) == 0 ||
900				    cinttype(c, C_NL)) {
901#ifdef MKSH_WITH_TEXTMODE
902					if (c == ORD('\r')) {
903						c = shf_getc(x.u.shf);
904						switch (c) {
905						case ORD('\n'):
906							break;
907						default:
908							shf_ungetc(c, x.u.shf);
909							/* FALLTHROUGH */
910						case -1:
911							c = ORD('\r');
912							break;
913						}
914					}
915#endif
916					if (c == ORD('\n'))
917						/* save newlines */
918						newlines++;
919				}
920				if (newlines && c != -1) {
921					shf_ungetc(c, x.u.shf);
922					c = ORD('\n');
923					--newlines;
924				}
925			}
926			if (c == -1) {
927				newlines = 0;
928				if (x.u.shf)
929					shf_close(x.u.shf);
930				if (x.split)
931					subst_exstat = waitlast();
932				type = XBASE;
933				if (f & DOBLANK)
934					doblank--;
935				continue;
936			}
937			break;
938		}
939
940		/* check for end of word or IFS separation */
941		if (c == 0 || (!quote && (f & DOBLANK) && doblank &&
942		    !make_magic && ctype(c, C_IFS))) {
943			/*-
944			 * How words are broken up:
945			 *			|	value of c
946			 *	word		|	ws	nws	0
947			 *	-----------------------------------
948			 *	IFS_WORD		w/WS	w/NWS	w
949			 *	IFS_WS			-/WS	-/NWS	-
950			 *	IFS_NWS			-/NWS	w/NWS	-
951			 *	IFS_IWS			-/WS	w/NWS	-
952			 * (w means generate a word)
953			 */
954			if ((word == IFS_WORD) || (word == IFS_QUOTE) || (c &&
955			    (word == IFS_IWS || word == IFS_NWS) &&
956			    !ctype(c, C_IFSWS))) {
957 emit_word:
958				if (f & DOHERESTR)
959					*dp++ = '\n';
960				*dp++ = '\0';
961				cp = Xclose(ds, dp);
962				if (fdo & DOBRACE)
963					/* also does globbing */
964					alt_expand(wp, cp, cp,
965					    cp + Xlength(ds, (dp - 1)),
966					    fdo | (f & DOMARKDIRS));
967				else if (fdo & DOGLOB)
968					glob(cp, wp, tobool(f & DOMARKDIRS));
969				else if ((f & DOPAT) || !(fdo & DOMAGIC))
970					XPput(*wp, cp);
971				else
972					XPput(*wp, debunk(cp, cp,
973					    strlen(cp) + 1));
974				fdo = 0;
975				saw_eq = false;
976				/* must be 1/0 */
977				tilde_ok = (f & (DOTILDE | DOASNTILDE)) ? 1 : 0;
978				if (c == 0)
979					return;
980				Xinit(ds, dp, 128, ATEMP);
981			} else if (c == 0) {
982				return;
983			} else if (type == XSUB && ctype(c, C_IFS) &&
984			    !ctype(c, C_IFSWS) && Xlength(ds, dp) == 0) {
985				*(cp = alloc(1, ATEMP)) = '\0';
986				XPput(*wp, cp);
987				type = XSUBMID;
988			}
989			if (word != IFS_NWS)
990				word = ctype(c, C_IFSWS) ? IFS_WS : IFS_NWS;
991		} else {
992			if (type == XSUB) {
993				if (word == IFS_NWS &&
994				    Xlength(ds, dp) == 0) {
995					*(cp = alloc(1, ATEMP)) = '\0';
996					XPput(*wp, cp);
997				}
998				type = XSUBMID;
999			}
1000
1001			/* age tilde_ok info - ~ code tests second bit */
1002			tilde_ok <<= 1;
1003			/* mark any special second pass chars */
1004			if (!quote)
1005				switch (ord(c)) {
1006				case ORD('['):
1007				case ORD('!'):
1008				case ORD('-'):
1009				case ORD(']'):
1010					/*
1011					 * For character classes - doesn't hurt
1012					 * to have magic !,-,]s outside of
1013					 * [...] expressions.
1014					 */
1015					if (f & (DOPAT | DOGLOB)) {
1016						fdo |= DOMAGIC;
1017						if ((unsigned int)c == ORD('['))
1018							fdo |= f & DOGLOB;
1019						*dp++ = MAGIC;
1020					}
1021					break;
1022				case ORD('*'):
1023				case ORD('?'):
1024					if (f & (DOPAT | DOGLOB)) {
1025						fdo |= DOMAGIC | (f & DOGLOB);
1026						*dp++ = MAGIC;
1027					}
1028					break;
1029				case ORD('{'):
1030				case ORD('}'):
1031				case ORD(','):
1032					if ((f & DOBRACE) &&
1033					    (ord(c) == ORD('{' /*}*/) ||
1034					    (fdo & DOBRACE))) {
1035						fdo |= DOBRACE|DOMAGIC;
1036						*dp++ = MAGIC;
1037					}
1038					break;
1039				case ORD('='):
1040					/* Note first unquoted = for ~ */
1041					if (!(f & DOTEMP) && (!Flag(FPOSIX) ||
1042					    (f & DOASNTILDE)) && !saw_eq) {
1043						saw_eq = true;
1044						tilde_ok = 1;
1045					}
1046					break;
1047				case ORD(':'):
1048					/* : */
1049					/* Note unquoted : for ~ */
1050					if (!(f & DOTEMP) && (f & DOASNTILDE))
1051						tilde_ok = 1;
1052					break;
1053				case ORD('~'):
1054					/*
1055					 * tilde_ok is reset whenever
1056					 * any of ' " $( $(( ${ } are seen.
1057					 * Note that tilde_ok must be preserved
1058					 * through the sequence ${A=a=}~
1059					 */
1060					if (type == XBASE &&
1061					    (f & (DOTILDE | DOASNTILDE)) &&
1062					    (tilde_ok & 2)) {
1063						const char *tcp;
1064						char *tdp = dp;
1065
1066						tcp = maybe_expand_tilde(sp,
1067						    &ds, &tdp,
1068						    tobool(f & DOASNTILDE));
1069						if (tcp) {
1070							if (dp != tdp)
1071								word = IFS_WORD;
1072							dp = tdp;
1073							sp = tcp;
1074							continue;
1075						}
1076					}
1077					break;
1078				}
1079			else
1080				/* undo temporary */
1081				quote &= ~2;
1082
1083			if (make_magic) {
1084				make_magic = false;
1085				fdo |= DOMAGIC | (f & DOGLOB);
1086				*dp++ = MAGIC;
1087			} else if (ISMAGIC(c)) {
1088				fdo |= DOMAGIC;
1089				*dp++ = MAGIC;
1090			}
1091			/* save output char */
1092			*dp++ = c;
1093			word = IFS_WORD;
1094		}
1095	}
1096}
1097
1098static bool
1099hasnonempty(const char **strv)
1100{
1101	size_t i = 0;
1102
1103	while (strv[i])
1104		if (*strv[i++])
1105			return (true);
1106	return (false);
1107}
1108
1109/*
1110 * Prepare to generate the string returned by ${} substitution.
1111 */
1112static int
1113varsub(Expand *xp, const char *sp, const char *word,
1114    int *stypep,	/* becomes qualifier type */
1115    int *slenp)		/* " " len (=, :=, etc.) valid iff *stypep != 0 */
1116{
1117	int c;
1118	int state;	/* next state: XBASE, XARG, XSUB, XNULLSUB */
1119	int stype;	/* substitution type */
1120	int slen = 0;
1121	const char *p;
1122	struct tbl *vp;
1123	bool zero_ok = false;
1124
1125	if ((stype = ord(sp[0])) == '\0')
1126		/* Bad variable name */
1127		return (-1);
1128
1129	xp->var = NULL;
1130
1131	/*-
1132	 * ${#var}, string length (-U: characters, +U: octets) or array size
1133	 * ${%var}, string width (-U: screen columns, +U: octets)
1134	 */
1135	c = ord(sp[1]);
1136	if ((unsigned int)stype == ORD('%') && c == '\0')
1137		return (-1);
1138	if (ctype(stype, C_SUB2) && c != '\0') {
1139		/* Can't have any modifiers for ${#...} or ${%...} */
1140		if (*word != CSUBST)
1141			return (-1);
1142		sp++;
1143		/* Check for size of array */
1144		if ((p = cstrchr(sp, '[')) && (ord(p[1]) == ORD('*') ||
1145		    ord(p[1]) == ORD('@')) && ord(p[2]) == ORD(']')) {
1146			int n = 0;
1147
1148			if ((unsigned int)stype != ORD('#'))
1149				return (-1);
1150			vp = global(arrayname(sp));
1151			if (vp->flag & (ISSET|ARRAY))
1152				zero_ok = true;
1153			for (; vp; vp = vp->u.array)
1154				if (vp->flag & ISSET)
1155					n++;
1156			c = n;
1157		} else if ((unsigned int)c == ORD('*') ||
1158		    (unsigned int)c == ORD('@')) {
1159			if ((unsigned int)stype != ORD('#'))
1160				return (-1);
1161			c = e->loc->argc;
1162		} else {
1163			p = str_val(global(sp));
1164			zero_ok = p != null;
1165			if ((unsigned int)stype == ORD('#'))
1166				c = utflen(p);
1167			else {
1168				/* partial utf_mbswidth reimplementation */
1169				const char *s = p;
1170				unsigned int wc;
1171				size_t len;
1172				int cw;
1173
1174				c = 0;
1175				while (*s) {
1176					if (!UTFMODE || (len = utf_mbtowc(&wc,
1177					    s)) == (size_t)-1)
1178						/* not UTFMODE or not UTF-8 */
1179						wc = rtt2asc(*s++);
1180					else
1181						/* UTFMODE and UTF-8 */
1182						s += len;
1183					/* wc == char or wchar at s++ */
1184					if ((cw = utf_wcwidth(wc)) == -1) {
1185						/* 646, 8859-1, 10646 C0/C1 */
1186						c = -1;
1187						break;
1188					}
1189					c += cw;
1190				}
1191			}
1192		}
1193		if (Flag(FNOUNSET) && c == 0 && !zero_ok)
1194			errorf(Tf_parm, sp);
1195		/* unqualified variable/string substitution */
1196		*stypep = 0;
1197		xp->str = shf_smprintf(Tf_d, c);
1198		return (XSUB);
1199	}
1200	if ((unsigned int)stype == ORD('!') && c != '\0' && *word == CSUBST) {
1201		sp++;
1202		if ((p = cstrchr(sp, '[')) && (ord(p[1]) == ORD('*') ||
1203		    ord(p[1]) == ORD('@')) && ord(p[2]) == ORD(']')) {
1204			c = ORD('!');
1205			stype = 0;
1206			goto arraynames;
1207		}
1208		xp->var = global(sp);
1209		xp->str = p ? shf_smprintf("%s[%lu]",
1210		    xp->var->name, arrayindex(xp->var)) : xp->var->name;
1211		*stypep = 0;
1212		return (XSUB);
1213	}
1214
1215	/* Check for qualifiers in word part */
1216	stype = 0;
1217	c = word[slen + 0] == CHAR ? ord(word[slen + 1]) : 0;
1218	if ((unsigned int)c == ORD(':')) {
1219		slen += 2;
1220		stype = STYPE_DBL;
1221		c = word[slen + 0] == CHAR ? ord(word[slen + 1]) : 0;
1222	}
1223	if (!stype && (unsigned int)c == ORD('/')) {
1224		slen += 2;
1225		stype = c;
1226		if (word[slen] == ADELIM &&
1227		    ord(word[slen + 1]) == (unsigned int)c) {
1228			slen += 2;
1229			stype |= STYPE_DBL;
1230		}
1231	} else if (stype == STYPE_DBL && ((unsigned int)c == ORD(' ') ||
1232	    (unsigned int)c == ORD('0'))) {
1233		stype |= ORD('0');
1234	} else if (ctype(c, C_SUB1)) {
1235		slen += 2;
1236		stype |= c;
1237	} else if (ctype(c, C_SUB2)) {
1238		/* Note: ksh88 allows :%, :%%, etc */
1239		slen += 2;
1240		stype = c;
1241		if (word[slen + 0] == CHAR &&
1242		    ord(word[slen + 1]) == (unsigned int)c) {
1243			stype |= STYPE_DBL;
1244			slen += 2;
1245		}
1246	} else if ((unsigned int)c == ORD('@')) {
1247		/* @x where x is command char */
1248		switch (c = ord(word[slen + 2]) == CHAR ?
1249		    ord(word[slen + 3]) : 0) {
1250		case ORD('#'):
1251		case ORD('/'):
1252		case ORD('Q'):
1253			break;
1254		default:
1255			return (-1);
1256		}
1257		stype |= STYPE_AT | c;
1258		slen += 4;
1259	} else if (stype)
1260		/* : is not ok */
1261		return (-1);
1262	if (!stype && *word != CSUBST)
1263		return (-1);
1264
1265	c = ord(sp[0]);
1266	if ((unsigned int)c == ORD('*') || (unsigned int)c == ORD('@')) {
1267		switch (stype & STYPE_SINGLE) {
1268		/* can't assign to a vector */
1269		case ORD('='):
1270		/* can't trim a vector (yet) */
1271		case ORD('%'):
1272		case ORD('#'):
1273		case ORD('?'):
1274		case ORD('0'):
1275		case ORD('/') | STYPE_AT:
1276		case ORD('/'):
1277		case ORD('#') | STYPE_AT:
1278		case ORD('Q') | STYPE_AT:
1279			return (-1);
1280		}
1281		if (e->loc->argc == 0) {
1282			xp->str = null;
1283			xp->var = global(sp);
1284			state = (unsigned int)c == ORD('@') ? XNULLSUB : XSUB;
1285		} else {
1286			xp->u.strv = (const char **)e->loc->argv + 1;
1287			xp->str = *xp->u.strv++;
1288			/* $@ */
1289			xp->split = tobool((unsigned int)c == ORD('@'));
1290			state = XARG;
1291		}
1292		/* POSIX 2009? */
1293		zero_ok = true;
1294	} else if ((p = cstrchr(sp, '[')) && (ord(p[1]) == ORD('*') ||
1295	    ord(p[1]) == ORD('@')) && ord(p[2]) == ORD(']')) {
1296		XPtrV wv;
1297
1298		switch (stype & STYPE_SINGLE) {
1299		/* can't assign to a vector */
1300		case ORD('='):
1301		/* can't trim a vector (yet) */
1302		case ORD('%'):
1303		case ORD('#'):
1304		case ORD('?'):
1305		case ORD('0'):
1306		case ORD('/') | STYPE_AT:
1307		case ORD('/'):
1308		case ORD('#') | STYPE_AT:
1309		case ORD('Q') | STYPE_AT:
1310			return (-1);
1311		}
1312		c = 0;
1313 arraynames:
1314		XPinit(wv, 32);
1315		vp = global(arrayname(sp));
1316		for (; vp; vp = vp->u.array) {
1317			if (!(vp->flag&ISSET))
1318				continue;
1319			XPput(wv, (unsigned int)c == ORD('!') ?
1320			    shf_smprintf(Tf_lu, arrayindex(vp)) :
1321			    str_val(vp));
1322		}
1323		if (XPsize(wv) == 0) {
1324			xp->str = null;
1325			state = ord(p[1]) == ORD('@') ? XNULLSUB : XSUB;
1326			XPfree(wv);
1327		} else {
1328			XPput(wv, 0);
1329			xp->u.strv = (const char **)XPptrv(wv);
1330			xp->str = *xp->u.strv++;
1331			/* ${foo[@]} */
1332			xp->split = tobool(ord(p[1]) == ORD('@'));
1333			state = XARG;
1334		}
1335	} else {
1336		xp->var = global(sp);
1337		xp->str = str_val(xp->var);
1338		/* can't assign things like $! or $1 */
1339		if ((unsigned int)(stype & STYPE_SINGLE) == ORD('=') &&
1340		    !*xp->str && ctype(*sp, C_VAR1 | C_DIGIT))
1341			return (-1);
1342		state = XSUB;
1343	}
1344
1345	c = stype & STYPE_CHAR;
1346	/* test the compiler's code generator */
1347	if ((!(stype & STYPE_AT) && (ctype(c, C_SUB2) ||
1348	    (((stype & STYPE_DBL) ? *xp->str == '\0' : xp->str == null) &&
1349	    (state != XARG || (ifs0 || xp->split ?
1350	    (xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ?
1351	    ctype(c, C_EQUAL | C_MINUS | C_QUEST) : (unsigned int)c == ORD('+')))) ||
1352	    (unsigned int)stype == (ORD('0') | STYPE_DBL) ||
1353	    (unsigned int)stype == (ORD('#') | STYPE_AT) ||
1354	    (unsigned int)stype == (ORD('Q') | STYPE_AT) ||
1355	    (unsigned int)(stype & STYPE_CHAR) == ORD('/'))
1356		/* expand word instead of variable value */
1357		state = XBASE;
1358	if (Flag(FNOUNSET) && xp->str == null && !zero_ok &&
1359	    (ctype(c, C_SUB2) || (state != XBASE && (unsigned int)c != ORD('+'))))
1360		errorf(Tf_parm, sp);
1361	*stypep = stype;
1362	*slenp = slen;
1363	return (state);
1364}
1365
1366/*
1367 * Run the command in $(...) and read its output.
1368 */
1369static int
1370comsub(Expand *xp, const char *cp, int fn)
1371{
1372	Source *s, *sold;
1373	struct op *t;
1374	struct shf *shf;
1375	bool doalias = false;
1376	uint8_t old_utfmode = UTFMODE;
1377
1378	switch (fn) {
1379	case COMASUB:
1380		fn = COMSUB;
1381		if (0)
1382			/* FALLTHROUGH */
1383	case FUNASUB:
1384		  fn = FUNSUB;
1385		doalias = true;
1386	}
1387
1388	s = pushs(SSTRING, ATEMP);
1389	s->start = s->str = cp;
1390	sold = source;
1391	t = compile(s, true, doalias);
1392	afree(s, ATEMP);
1393	source = sold;
1394
1395	UTFMODE = old_utfmode;
1396
1397	if (t == NULL)
1398		return (XBASE);
1399
1400	/* no waitlast() unless specifically enabled later */
1401	xp->split = false;
1402
1403	if (t->type == TCOM &&
1404	    *t->args == NULL && *t->vars == NULL && t->ioact != NULL) {
1405		/* $(<file) */
1406		struct ioword *io = *t->ioact;
1407		char *name;
1408
1409		switch (io->ioflag & IOTYPE) {
1410		case IOREAD:
1411			shf = shf_open(name = evalstr(io->ioname, DOTILDE),
1412				O_RDONLY, 0, SHF_MAPHI | SHF_CLEXEC);
1413			if (shf == NULL)
1414				warningf(!Flag(FTALKING), Tf_sD_s_sD_s,
1415				    name, Tcant_open, "$(<...) input",
1416				    cstrerror(errno));
1417			break;
1418		case IOHERE:
1419			if (!herein(io, &name)) {
1420				xp->str = name;
1421				/* as $(…) requires, trim trailing newlines */
1422				name = strnul(name);
1423				while (name > xp->str && name[-1] == '\n')
1424					--name;
1425				*name = '\0';
1426				return (XSUB);
1427			}
1428			shf = NULL;
1429			break;
1430		default:
1431			errorf(Tf_sD_s, T_funny_command,
1432			    snptreef(NULL, 32, Tft_R, io));
1433		}
1434	} else if (fn == FUNSUB) {
1435		int ofd1;
1436		struct temp *tf = NULL;
1437
1438		/*
1439		 * create a temporary file, open for reading and writing,
1440		 * with an shf open for reading (buffered) but yet unused
1441		 */
1442		maketemp(ATEMP, TT_FUNSUB, &tf);
1443		if (!tf->shf) {
1444			errorf(Tf_temp,
1445			    Tcreate, tf->tffn, cstrerror(errno));
1446		}
1447		/* extract shf from temporary file, unlink and free it */
1448		shf = tf->shf;
1449		unlink(tf->tffn);
1450		afree(tf, ATEMP);
1451		/* save stdout and let it point to the tempfile */
1452		ofd1 = savefd(1);
1453		ksh_dup2(shf_fileno(shf), 1, false);
1454		/*
1455		 * run tree, with output thrown into the tempfile,
1456		 * in a new function block
1457		 */
1458		valsub(t, NULL);
1459		subst_exstat = exstat & 0xFF;
1460		/* rewind the tempfile and restore regular stdout */
1461		lseek(shf_fileno(shf), (off_t)0, SEEK_SET);
1462		restfd(1, ofd1);
1463	} else if (fn == VALSUB) {
1464		xp->str = valsub(t, ATEMP);
1465		subst_exstat = exstat & 0xFF;
1466		return (XSUB);
1467	} else {
1468		int ofd1, pv[2];
1469
1470		openpipe(pv);
1471		shf = shf_fdopen(pv[0], SHF_RD, NULL);
1472		ofd1 = savefd(1);
1473		if (pv[1] != 1) {
1474			ksh_dup2(pv[1], 1, false);
1475			close(pv[1]);
1476		}
1477		execute(t, XXCOM | XPIPEO | XFORK, NULL);
1478		restfd(1, ofd1);
1479		startlast();
1480		/* waitlast() */
1481		xp->split = true;
1482	}
1483
1484	xp->u.shf = shf;
1485	return (XCOM);
1486}
1487
1488/*
1489 * perform #pattern and %pattern substitution in ${}
1490 */
1491static char *
1492trimsub(char *str, char *pat, int how)
1493{
1494	char *end = strnul(str);
1495	char *p, c;
1496
1497	switch (how & (STYPE_CHAR | STYPE_DBL)) {
1498	case ORD('#'):
1499		/* shortest match at beginning */
1500		for (p = str; p <= end; p += utf_ptradj(p)) {
1501			c = *p; *p = '\0';
1502			if (gmatchx(str, pat, false)) {
1503				record_match(str);
1504				*p = c;
1505				return (p);
1506			}
1507			*p = c;
1508		}
1509		break;
1510	case ORD('#') | STYPE_DBL:
1511		/* longest match at beginning */
1512		for (p = end; p >= str; p--) {
1513			c = *p; *p = '\0';
1514			if (gmatchx(str, pat, false)) {
1515				record_match(str);
1516				*p = c;
1517				return (p);
1518			}
1519			*p = c;
1520		}
1521		break;
1522	case ORD('%'):
1523		/* shortest match at end */
1524		p = end;
1525		while (p >= str) {
1526			if (gmatchx(p, pat, false))
1527				goto trimsub_match;
1528			if (UTFMODE) {
1529				char *op = p;
1530				while ((p-- > str) && ((rtt2asc(*p) & 0xC0) == 0x80))
1531					;
1532				if ((p < str) || (p + utf_ptradj(p) != op))
1533					p = op - 1;
1534			} else
1535				--p;
1536		}
1537		break;
1538	case ORD('%') | STYPE_DBL:
1539		/* longest match at end */
1540		for (p = str; p <= end; p++)
1541			if (gmatchx(p, pat, false)) {
1542 trimsub_match:
1543				record_match(p);
1544				strndupx(end, str, p - str, ATEMP);
1545				return (end);
1546			}
1547		break;
1548	}
1549
1550	/* no match, return string */
1551	return (str);
1552}
1553
1554/*
1555 * glob
1556 * Name derived from V6's /etc/glob, the program that expanded filenames.
1557 */
1558
1559/* XXX cp not const 'cause slashes are temporarily replaced with NULs... */
1560static void
1561glob(char *cp, XPtrV *wp, bool markdirs)
1562{
1563	int oldsize = XPsize(*wp);
1564
1565	if (glob_str(cp, wp, markdirs) == 0)
1566		XPput(*wp, debunk(cp, cp, strlen(cp) + 1));
1567	else
1568		qsort(XPptrv(*wp) + oldsize, XPsize(*wp) - oldsize,
1569		    sizeof(void *), ascpstrcmp);
1570}
1571
1572#define GF_NONE		0
1573#define GF_EXCHECK	BIT(0)		/* do existence check on file */
1574#define GF_GLOBBED	BIT(1)		/* some globbing has been done */
1575#define GF_MARKDIR	BIT(2)		/* add trailing / to directories */
1576
1577/*
1578 * Apply file globbing to cp and store the matching files in wp. Returns
1579 * the number of matches found.
1580 */
1581int
1582glob_str(char *cp, XPtrV *wp, bool markdirs)
1583{
1584	int oldsize = XPsize(*wp);
1585	XString xs;
1586	char *xp;
1587
1588	Xinit(xs, xp, 256, ATEMP);
1589	globit(&xs, &xp, cp, wp, markdirs ? GF_MARKDIR : GF_NONE);
1590	Xfree(xs, xp);
1591
1592	return (XPsize(*wp) - oldsize);
1593}
1594
1595static void
1596globit(XString *xs,	/* dest string */
1597    char **xpp,		/* ptr to dest end */
1598    char *sp,		/* source path */
1599    XPtrV *wp,		/* output list */
1600    int check)		/* GF_* flags */
1601{
1602	char *np;		/* next source component */
1603	char *xp = *xpp;
1604	char *se;
1605	char odirsep;
1606
1607	/* This to allow long expansions to be interrupted */
1608	intrcheck();
1609
1610	if (sp == NULL) {
1611		/* end of source path */
1612		/*
1613		 * We only need to check if the file exists if a pattern
1614		 * is followed by a non-pattern (eg, foo*x/bar; no check
1615		 * is needed for foo* since the match must exist) or if
1616		 * any patterns were expanded and the markdirs option is set.
1617		 * Symlinks make things a bit tricky...
1618		 */
1619		if ((check & GF_EXCHECK) ||
1620		    ((check & GF_MARKDIR) && (check & GF_GLOBBED))) {
1621#define stat_check()	(stat_done ? stat_done : (stat_done = \
1622			    stat(Xstring(*xs, xp), &statb) < 0 ? -1 : 1))
1623			struct stat lstatb, statb;
1624			/* -1: failed, 1 ok, 0 not yet done */
1625			int stat_done = 0;
1626
1627			if (mksh_lstat(Xstring(*xs, xp), &lstatb) < 0)
1628				return;
1629			/*
1630			 * special case for systems which strip trailing
1631			 * slashes from regular files (eg, /etc/passwd/).
1632			 * SunOS 4.1.3 does this...
1633			 */
1634			if ((check & GF_EXCHECK) && xp > Xstring(*xs, xp) &&
1635			    mksh_cdirsep(xp[-1]) && !S_ISDIR(lstatb.st_mode) &&
1636			    (!S_ISLNK(lstatb.st_mode) ||
1637			    stat_check() < 0 || !S_ISDIR(statb.st_mode)))
1638				return;
1639			/*
1640			 * Possibly tack on a trailing / if there isn't already
1641			 * one and if the file is a directory or a symlink to a
1642			 * directory
1643			 */
1644			if (((check & GF_MARKDIR) && (check & GF_GLOBBED)) &&
1645			    xp > Xstring(*xs, xp) && !mksh_cdirsep(xp[-1]) &&
1646			    (S_ISDIR(lstatb.st_mode) ||
1647			    (S_ISLNK(lstatb.st_mode) && stat_check() > 0 &&
1648			    S_ISDIR(statb.st_mode)))) {
1649				*xp++ = '/';
1650				*xp = '\0';
1651			}
1652		}
1653		strndupx(np, Xstring(*xs, xp), Xlength(*xs, xp), ATEMP);
1654		XPput(*wp, np);
1655		return;
1656	}
1657
1658	if (xp > Xstring(*xs, xp))
1659		*xp++ = '/';
1660	while (mksh_cdirsep(*sp)) {
1661		Xcheck(*xs, xp);
1662		*xp++ = *sp++;
1663	}
1664	np = mksh_sdirsep(sp);
1665	if (np != NULL) {
1666		se = np;
1667		/* don't assume '/', can be multiple kinds */
1668		odirsep = *np;
1669		*np++ = '\0';
1670	} else {
1671		odirsep = '\0'; /* keep gcc quiet */
1672		se = strnul(sp);
1673	}
1674
1675
1676	/*
1677	 * Check if sp needs globbing - done to avoid pattern checks for strings
1678	 * containing MAGIC characters, open [s without the matching close ],
1679	 * etc. (otherwise opendir() will be called which may fail because the
1680	 * directory isn't readable - if no globbing is needed, only execute
1681	 * permission should be required (as per POSIX)).
1682	 */
1683	if (!has_globbing(sp)) {
1684		XcheckN(*xs, xp, se - sp + 1);
1685		debunk(xp, sp, Xnleft(*xs, xp));
1686		xp = strnul(xp);
1687		*xpp = xp;
1688		globit(xs, xpp, np, wp, check);
1689	} else {
1690		DIR *dirp;
1691		struct dirent *d;
1692		char *name;
1693		size_t len, prefix_len;
1694
1695		/* xp = *xpp;	copy_non_glob() may have re-alloc'd xs */
1696		*xp = '\0';
1697		prefix_len = Xlength(*xs, xp);
1698		dirp = opendir(prefix_len ? Xstring(*xs, xp) : Tdot);
1699		if (dirp == NULL)
1700			goto Nodir;
1701		while ((d = readdir(dirp)) != NULL) {
1702			name = d->d_name;
1703			if (name[0] == '.' &&
1704			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1705				/* always ignore . and .. */
1706				continue;
1707			if ((*name == '.' && *sp != '.') ||
1708			    !gmatchx(name, sp, true))
1709				continue;
1710
1711			len = strlen(d->d_name) + 1;
1712			XcheckN(*xs, xp, len);
1713			memcpy(xp, name, len);
1714			*xpp = xp + len - 1;
1715			globit(xs, xpp, np, wp, (check & GF_MARKDIR) |
1716			    GF_GLOBBED | (np ? GF_EXCHECK : GF_NONE));
1717			xp = Xstring(*xs, xp) + prefix_len;
1718		}
1719		closedir(dirp);
1720 Nodir:
1721		;
1722	}
1723
1724	if (np != NULL)
1725		*--np = odirsep;
1726}
1727
1728/* remove MAGIC from string */
1729char *
1730debunk(char *dp, const char *sp, size_t dlen)
1731{
1732	char *d;
1733	const char *s;
1734
1735	if ((s = cstrchr(sp, MAGIC))) {
1736		if (s - sp >= (ssize_t)dlen)
1737			return (dp);
1738		memmove(dp, sp, s - sp);
1739		for (d = dp + (s - sp); *s && (d - dp < (ssize_t)dlen); s++)
1740			if (!ISMAGIC(*s) || !(*++s & 0x80) ||
1741			    !ctype(*s & 0x7F, C_PATMO | C_SPC))
1742				*d++ = *s;
1743			else {
1744				/* extended pattern operators: *+?@! */
1745				if ((*s & 0x7f) != ' ')
1746					*d++ = *s & 0x7f;
1747				if (d - dp < (ssize_t)dlen)
1748					*d++ = '(';
1749			}
1750		*d = '\0';
1751	} else if (dp != sp)
1752		strlcpy(dp, sp, dlen);
1753	return (dp);
1754}
1755
1756/*
1757 * Check if p is an unquoted name, possibly followed by a / or :. If so
1758 * puts the expanded version in *dcp,dp and returns a pointer in p just
1759 * past the name, otherwise returns 0.
1760 */
1761static const char *
1762maybe_expand_tilde(const char *p, XString *dsp, char **dpp, bool isassign)
1763{
1764	XString ts;
1765	char *dp = *dpp;
1766	char *tp;
1767	const char *r;
1768
1769	Xinit(ts, tp, 16, ATEMP);
1770	/* : only for DOASNTILDE form */
1771	while (p[0] == CHAR && /* not cdirsep */ p[1] != '/' &&
1772	    (!isassign || p[1] != ':')) {
1773		Xcheck(ts, tp);
1774		*tp++ = p[1];
1775		p += 2;
1776	}
1777	*tp = '\0';
1778	r = (p[0] == EOS || p[0] == CHAR || p[0] == CSUBST) ?
1779	    do_tilde(Xstring(ts, tp)) : NULL;
1780	Xfree(ts, tp);
1781	if (r) {
1782		while (*r) {
1783			Xcheck(*dsp, dp);
1784			if (ISMAGIC(*r))
1785				*dp++ = MAGIC;
1786			*dp++ = *r++;
1787		}
1788		*dpp = dp;
1789		r = p;
1790	}
1791	return (r);
1792}
1793
1794/*
1795 * tilde expansion
1796 *
1797 * based on a version by Arnold Robbins
1798 */
1799char *
1800do_tilde(char *cp)
1801{
1802	char *dp = null;
1803#ifndef MKSH_NOPWNAM
1804	bool do_simplify = true;
1805#endif
1806
1807	if (cp[0] == '\0')
1808		dp = str_val(global("HOME"));
1809	else if (cp[0] == '+' && cp[1] == '\0')
1810		dp = str_val(global(TPWD));
1811	else if (ksh_isdash(cp))
1812		dp = str_val(global(TOLDPWD));
1813#ifndef MKSH_NOPWNAM
1814	else {
1815		dp = homedir(cp);
1816		do_simplify = false;
1817	}
1818#endif
1819
1820	/* if parameters aren't set, don't expand ~ */
1821	if (dp == NULL || dp == null)
1822		return (NULL);
1823
1824	/* simplify parameters as if cwd upon entry */
1825#ifndef MKSH_NOPWNAM
1826	if (do_simplify)
1827#endif
1828	  {
1829		strdupx(dp, dp, ATEMP);
1830		simplify_path(dp);
1831	}
1832	return (dp);
1833}
1834
1835#ifndef MKSH_NOPWNAM
1836/*
1837 * map userid to user's home directory.
1838 * note that 4.3's getpw adds more than 6K to the shell,
1839 * and the YP version probably adds much more.
1840 * we might consider our own version of getpwnam() to keep the size down.
1841 */
1842static char *
1843homedir(char *name)
1844{
1845	struct tbl *ap;
1846
1847	ap = ktenter(&homedirs, name, hash(name));
1848	if (!(ap->flag & ISSET)) {
1849		struct passwd *pw;
1850
1851		pw = getpwnam(name);
1852		if (pw == NULL)
1853			return (NULL);
1854		strdupx(ap->val.s, pw->pw_dir, APERM);
1855		ap->flag |= DEFINED|ISSET|ALLOC;
1856	}
1857	return (ap->val.s);
1858}
1859#endif
1860
1861static void
1862alt_expand(XPtrV *wp, char *start, char *exp_start, char *end, int fdo)
1863{
1864	unsigned int count = 0;
1865	char *brace_start, *brace_end, *comma = NULL;
1866	char *field_start;
1867	char *p = exp_start;
1868
1869	/* search for open brace */
1870	while ((p = strchr(p, MAGIC)) && ord(p[1]) != ORD('{' /*}*/))
1871		p += 2;
1872	brace_start = p;
1873
1874	/* find matching close brace, if any */
1875	if (p) {
1876		comma = NULL;
1877		count = 1;
1878		p += 2;
1879		while (*p && count) {
1880			if (ISMAGIC(*p++)) {
1881				if (ord(*p) == ORD('{' /*}*/))
1882					++count;
1883				else if (ord(*p) == ORD(/*{*/ '}'))
1884					--count;
1885				else if (*p == ',' && count == 1)
1886					comma = p;
1887				++p;
1888			}
1889		}
1890	}
1891	/* no valid expansions... */
1892	if (!p || count != 0) {
1893		/*
1894		 * Note that given a{{b,c} we do not expand anything (this is
1895		 * what AT&T ksh does. This may be changed to do the {b,c}
1896		 * expansion. }
1897		 */
1898		if (fdo & DOGLOB)
1899			glob(start, wp, tobool(fdo & DOMARKDIRS));
1900		else
1901			XPput(*wp, debunk(start, start, end - start));
1902		return;
1903	}
1904	brace_end = p;
1905	if (!comma) {
1906		alt_expand(wp, start, brace_end, end, fdo);
1907		return;
1908	}
1909
1910	/* expand expression */
1911	field_start = brace_start + 2;
1912	count = 1;
1913	for (p = brace_start + 2; p != brace_end; p++) {
1914		if (ISMAGIC(*p)) {
1915			if (ord(*++p) == ORD('{' /*}*/))
1916				++count;
1917			else if ((ord(*p) == ORD(/*{*/ '}') && --count == 0) ||
1918			    (*p == ',' && count == 1)) {
1919				char *news;
1920				int l1, l2, l3;
1921
1922				/*
1923				 * addition safe since these operate on
1924				 * one string (separate substrings)
1925				 */
1926				l1 = brace_start - start;
1927				l2 = (p - 1) - field_start;
1928				l3 = end - brace_end;
1929				news = alloc(l1 + l2 + l3 + 1, ATEMP);
1930				memcpy(news, start, l1);
1931				memcpy(news + l1, field_start, l2);
1932				memcpy(news + l1 + l2, brace_end, l3);
1933				news[l1 + l2 + l3] = '\0';
1934				alt_expand(wp, news, news + l1,
1935				    news + l1 + l2 + l3, fdo);
1936				field_start = p + 1;
1937			}
1938		}
1939	}
1940	return;
1941}
1942
1943/* helper function due to setjmp/longjmp woes */
1944static char *
1945valsub(struct op *t, Area *ap)
1946{
1947	char * volatile cp = NULL;
1948	struct tbl * volatile vp = NULL;
1949
1950	newenv(E_FUNC);
1951	newblock();
1952	if (ap)
1953		vp = local("REPLY", false);
1954	if (!kshsetjmp(e->jbuf))
1955		execute(t, XXCOM | XERROK, NULL);
1956	if (vp)
1957		strdupx(cp, str_val(vp), ap);
1958	quitenv(NULL);
1959
1960	return (cp);
1961}
1962