1/*	$OpenBSD: eval.c,v 1.40 2013/09/14 20:09:30 millert Exp $	*/
2
3/*-
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 *		 2011, 2012, 2013, 2014, 2015, 2016
6 *	mirabilos <m@mirbsd.org>
7 *
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
13 *
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
22 */
23
24#include "sh.h"
25
26__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.194 2016/11/11 23:31:34 tg Exp $");
27
28/*
29 * string expansion
30 *
31 * first pass: quoting, IFS separation, ~, ${}, $() and $(()) substitution.
32 * second pass: alternation ({,}), filename expansion (*?[]).
33 */
34
35/* expansion generator state */
36typedef struct {
37	/* not including an "int type;" member, see expand() */
38	/* string */
39	const char *str;
40	/* source */
41	union {
42		/* string[] */
43		const char **strv;
44		/* file */
45		struct shf *shf;
46	} u;
47	/* variable in ${var...} */
48	struct tbl *var;
49	/* split "$@" / call waitlast in $() */
50	bool split;
51} Expand;
52
53#define	XBASE		0	/* scanning original */
54#define	XSUB		1	/* expanding ${} string */
55#define	XARGSEP		2	/* ifs0 between "$*" */
56#define	XARG		3	/* expanding $*, $@ */
57#define	XCOM		4	/* expanding $() */
58#define XNULLSUB	5	/* "$@" when $# is 0 (don't generate word) */
59#define XSUBMID		6	/* middle of expanding ${} */
60
61/* States used for field splitting */
62#define IFS_WORD	0	/* word has chars (or quotes except "$@") */
63#define IFS_WS		1	/* have seen IFS white-space */
64#define IFS_NWS		2	/* have seen IFS non-white-space */
65#define IFS_IWS		3	/* beginning of word, ignore IFS WS */
66#define IFS_QUOTE	4	/* beg.w/quote, become IFS_WORD unless "$@" */
67
68static int varsub(Expand *, const char *, const char *, int *, int *);
69static int comsub(Expand *, const char *, int);
70static char *valsub(struct op *, Area *);
71static char *trimsub(char *, char *, int);
72static void glob(char *, XPtrV *, bool);
73static void globit(XString *, char **, char *, XPtrV *, int);
74static const char *maybe_expand_tilde(const char *, XString *, char **, bool);
75#ifndef MKSH_NOPWNAM
76static char *homedir(char *);
77#endif
78static void alt_expand(XPtrV *, char *, char *, char *, int);
79static int utflen(const char *) MKSH_A_PURE;
80static void utfincptr(const char *, mksh_ari_t *);
81
82/* UTFMODE functions */
83static int
84utflen(const char *s)
85{
86	size_t n;
87
88	if (UTFMODE) {
89		n = 0;
90		while (*s) {
91			s += utf_ptradj(s);
92			++n;
93		}
94	} else
95		n = strlen(s);
96
97	if (n > 2147483647)
98		n = 2147483647;
99	return ((int)n);
100}
101
102static void
103utfincptr(const char *s, mksh_ari_t *lp)
104{
105	const char *cp = s;
106
107	while ((*lp)--)
108		cp += utf_ptradj(cp);
109	*lp = cp - s;
110}
111
112/* compile and expand word */
113char *
114substitute(const char *cp, int f)
115{
116	struct source *s, *sold;
117
118	sold = source;
119	s = pushs(SWSTR, ATEMP);
120	s->start = s->str = cp;
121	source = s;
122	if (yylex(ONEWORD) != LWORD)
123		internal_errorf(Tbadsubst);
124	source = sold;
125	afree(s, ATEMP);
126	return (evalstr(yylval.cp, f));
127}
128
129/*
130 * expand arg-list
131 */
132char **
133eval(const char **ap, int f)
134{
135	XPtrV w;
136
137	if (*ap == NULL) {
138		union mksh_ccphack vap;
139
140		vap.ro = ap;
141		return (vap.rw);
142	}
143	XPinit(w, 32);
144	/* space for shell name */
145	XPput(w, NULL);
146	while (*ap != NULL)
147		expand(*ap++, &w, f);
148	XPput(w, NULL);
149	return ((char **)XPclose(w) + 1);
150}
151
152/*
153 * expand string
154 */
155char *
156evalstr(const char *cp, int f)
157{
158	XPtrV w;
159	char *dp = null;
160
161	XPinit(w, 1);
162	expand(cp, &w, f);
163	if (XPsize(w))
164		dp = *XPptrv(w);
165	XPfree(w);
166	return (dp);
167}
168
169/*
170 * expand string - return only one component
171 * used from iosetup to expand redirection files
172 */
173char *
174evalonestr(const char *cp, int f)
175{
176	XPtrV w;
177	char *rv;
178
179	XPinit(w, 1);
180	expand(cp, &w, f);
181	switch (XPsize(w)) {
182	case 0:
183		rv = null;
184		break;
185	case 1:
186		rv = (char *) *XPptrv(w);
187		break;
188	default:
189		rv = evalstr(cp, f & ~DOGLOB);
190		break;
191	}
192	XPfree(w);
193	return (rv);
194}
195
196/* for nested substitution: ${var:=$var2} */
197typedef struct SubType {
198	struct tbl *var;	/* variable for ${var..} */
199	struct SubType *prev;	/* old type */
200	struct SubType *next;	/* poped type (to avoid re-allocating) */
201	size_t	base;		/* start position of expanded word */
202	short	stype;		/* [=+-?%#] action after expanded word */
203	short	f;		/* saved value of f (DOPAT, etc) */
204	uint8_t	quotep;		/* saved value of quote (for ${..[%#]..}) */
205	uint8_t	quotew;		/* saved value of quote (for ${..[+-=]..}) */
206} SubType;
207
208void
209expand(
210    /* input word */
211    const char *ccp,
212    /* output words */
213    XPtrV *wp,
214    /* DO* flags */
215    int f)
216{
217	int c = 0;
218	/* expansion type */
219	int type;
220	/* quoted */
221	int quote = 0;
222	/* destination string and live pointer */
223	XString ds;
224	char *dp;
225	/* source */
226	const char *sp;
227	/* second pass flags */
228	int fdo;
229	/* have word */
230	int word;
231	/* field splitting of parameter/command substitution */
232	int doblank;
233	/* expansion variables */
234	Expand x = {
235		NULL, { NULL }, NULL, 0
236	};
237	SubType st_head, *st;
238	/* record number of trailing newlines in COMSUB */
239	int newlines = 0;
240	bool saw_eq, make_magic;
241	unsigned int tilde_ok;
242	size_t len;
243	char *cp;
244
245	if (ccp == NULL)
246		internal_errorf("expand(NULL)");
247	/* for alias, readonly, set, typeset commands */
248	if ((f & DOVACHECK) && is_wdvarassign(ccp)) {
249		f &= ~(DOVACHECK | DOBLANK | DOGLOB | DOTILDE);
250		f |= DOASNTILDE | DOSCALAR;
251	}
252	if (Flag(FNOGLOB))
253		f &= ~DOGLOB;
254	if (Flag(FMARKDIRS))
255		f |= DOMARKDIRS;
256	if (Flag(FBRACEEXPAND) && (f & DOGLOB))
257		f |= DOBRACE;
258
259	/* init destination string */
260	Xinit(ds, dp, 128, ATEMP);
261	type = XBASE;
262	sp = ccp;
263	fdo = 0;
264	saw_eq = false;
265	/* must be 1/0 */
266	tilde_ok = (f & (DOTILDE | DOASNTILDE)) ? 1 : 0;
267	doblank = 0;
268	make_magic = false;
269	word = (f&DOBLANK) ? IFS_WS : IFS_WORD;
270	/* clang doesn't know OSUBST comes before CSUBST */
271	memset(&st_head, 0, sizeof(st_head));
272	st = &st_head;
273
274	while (/* CONSTCOND */ 1) {
275		Xcheck(ds, dp);
276
277		switch (type) {
278		case XBASE:
279			/* original prefixed string */
280			c = *sp++;
281			switch (c) {
282			case EOS:
283				c = 0;
284				break;
285			case CHAR:
286				c = *sp++;
287				break;
288			case QCHAR:
289				/* temporary quote */
290				quote |= 2;
291				c = *sp++;
292				break;
293			case OQUOTE:
294				if (word != IFS_WORD)
295					word = IFS_QUOTE;
296				tilde_ok = 0;
297				quote = 1;
298				continue;
299			case CQUOTE:
300				if (word == IFS_QUOTE)
301					word = IFS_WORD;
302				quote = st->quotew;
303				continue;
304			case COMSUB:
305			case FUNSUB:
306			case VALSUB:
307				tilde_ok = 0;
308				if (f & DONTRUNCOMMAND) {
309					word = IFS_WORD;
310					*dp++ = '$';
311					*dp++ = c == COMSUB ? '(' : '{';
312					if (c != COMSUB)
313						*dp++ = c == FUNSUB ? ' ' : '|';
314					while (*sp != '\0') {
315						Xcheck(ds, dp);
316						*dp++ = *sp++;
317					}
318					if (c != COMSUB) {
319						*dp++ = ';';
320						*dp++ = '}';
321					} else
322						*dp++ = ')';
323				} else {
324					type = comsub(&x, sp, c);
325					if (type != XBASE && (f & DOBLANK))
326						doblank++;
327					sp = strnul(sp) + 1;
328					newlines = 0;
329				}
330				continue;
331			case EXPRSUB:
332				tilde_ok = 0;
333				if (f & DONTRUNCOMMAND) {
334					word = IFS_WORD;
335					*dp++ = '$'; *dp++ = '('; *dp++ = '(';
336					while (*sp != '\0') {
337						Xcheck(ds, dp);
338						*dp++ = *sp++;
339					}
340					*dp++ = ')'; *dp++ = ')';
341				} else {
342					struct tbl v;
343
344					v.flag = DEFINED|ISSET|INTEGER;
345					/* not default */
346					v.type = 10;
347					v.name[0] = '\0';
348					v_evaluate(&v, substitute(sp, 0),
349					    KSH_UNWIND_ERROR, true);
350					sp = strnul(sp) + 1;
351					x.str = str_val(&v);
352					type = XSUB;
353					if (f & DOBLANK)
354						doblank++;
355				}
356				continue;
357			case OSUBST: {
358				/* ${{#}var{:}[=+-?#%]word} */
359			/*-
360			 * format is:
361			 *	OSUBST [{x] plain-variable-part \0
362			 *	    compiled-word-part CSUBST [}x]
363			 * This is where all syntax checking gets done...
364			 */
365				/* skip the { or x (}) */
366				const char *varname = ++sp;
367				int stype;
368				int slen = 0;
369
370				/* skip variable */
371				sp = cstrchr(sp, '\0') + 1;
372				type = varsub(&x, varname, sp, &stype, &slen);
373				if (type < 0) {
374					char *beg, *end, *str;
375 unwind_substsyn:
376					/* restore sp */
377					sp = varname - 2;
378					beg = wdcopy(sp, ATEMP);
379					end = (wdscan(cstrchr(sp, '\0') + 1,
380					    CSUBST) - sp) + beg;
381					/* ({) the } or x is already skipped */
382					if (end < wdscan(beg, EOS))
383						*end = EOS;
384					str = snptreef(NULL, 64, Tf_S, beg);
385					afree(beg, ATEMP);
386					errorf(Tf_sD_s, str, Tbadsubst);
387				}
388				if (f & DOBLANK)
389					doblank++;
390				tilde_ok = 0;
391				if (word == IFS_QUOTE && type != XNULLSUB)
392					word = IFS_WORD;
393				if (type == XBASE) {
394					/* expand? */
395					if (!st->next) {
396						SubType *newst;
397
398						newst = alloc(sizeof(SubType), ATEMP);
399						newst->next = NULL;
400						newst->prev = st;
401						st->next = newst;
402					}
403					st = st->next;
404					st->stype = stype;
405					st->base = Xsavepos(ds, dp);
406					st->f = f;
407					if (x.var == vtemp) {
408						st->var = tempvar(vtemp->name);
409						st->var->flag &= ~INTEGER;
410						/* can't fail here */
411						setstr(st->var,
412						    str_val(x.var),
413						    KSH_RETURN_ERROR | 0x4);
414					} else
415						st->var = x.var;
416
417					st->quotew = st->quotep = quote;
418					/* skip qualifier(s) */
419					if (stype)
420						sp += slen;
421					switch (stype & 0x17F) {
422					case 0x100 | '#':
423						x.str = shf_smprintf("%08X",
424						    (unsigned int)hash(str_val(st->var)));
425						break;
426					case 0x100 | 'Q': {
427						struct shf shf;
428
429						shf_sopen(NULL, 0, SHF_WR|SHF_DYNAMIC, &shf);
430						print_value_quoted(&shf, str_val(st->var));
431						x.str = shf_sclose(&shf);
432						break;
433					    }
434					case '0': {
435						char *beg, *mid, *end, *stg;
436						mksh_ari_t from = 0, num = -1, flen, finc = 0;
437
438						beg = wdcopy(sp, ATEMP);
439						mid = beg + (wdscan(sp, ADELIM) - sp);
440						stg = beg + (wdscan(sp, CSUBST) - sp);
441						mid[-2] = EOS;
442						if (mid[-1] == /*{*/'}') {
443							sp += mid - beg - 1;
444							end = NULL;
445						} else {
446							end = mid +
447							    (wdscan(mid, ADELIM) - mid);
448							if (end[-1] != /*{*/ '}')
449								/* more than max delimiters */
450								goto unwind_substsyn;
451							end[-2] = EOS;
452							sp += end - beg - 1;
453						}
454						evaluate(substitute(stg = wdstrip(beg, 0), 0),
455						    &from, KSH_UNWIND_ERROR, true);
456						afree(stg, ATEMP);
457						if (end) {
458							evaluate(substitute(stg = wdstrip(mid, 0), 0),
459							    &num, KSH_UNWIND_ERROR, true);
460							afree(stg, ATEMP);
461						}
462						afree(beg, ATEMP);
463						beg = str_val(st->var);
464						flen = utflen(beg);
465						if (from < 0) {
466							if (-from < flen)
467								finc = flen + from;
468						} else
469							finc = from < flen ? from : flen;
470						if (UTFMODE)
471							utfincptr(beg, &finc);
472						beg += finc;
473						flen = utflen(beg);
474						if (num < 0 || num > flen)
475							num = flen;
476						if (UTFMODE)
477							utfincptr(beg, &num);
478						strndupx(x.str, beg, num, ATEMP);
479						goto do_CSUBST;
480					    }
481					case 0x100 | '/':
482					case '/': {
483						char *s, *p, *d, *sbeg, *end;
484						char *pat = NULL, *rrep = null;
485						char fpat = 0, *tpat1, *tpat2;
486						char *ws, *wpat, *wrep;
487
488						s = ws = wdcopy(sp, ATEMP);
489						p = s + (wdscan(sp, ADELIM) - sp);
490						d = s + (wdscan(sp, CSUBST) - sp);
491						p[-2] = EOS;
492						if (p[-1] == /*{*/'}')
493							d = NULL;
494						else
495							d[-2] = EOS;
496						sp += (d ? d : p) - s - 1;
497						if (!(stype & 0x180) &&
498						    s[0] == CHAR &&
499						    (s[1] == '#' || s[1] == '%'))
500							fpat = s[1];
501						wpat = s + (fpat ? 2 : 0);
502						wrep = d ? p : NULL;
503						if (!(stype & 0x100)) {
504							rrep = wrep ? evalstr(wrep,
505							    DOTILDE | DOSCALAR) :
506							    null;
507						}
508
509						/* prepare string on which to work */
510						strdupx(s, str_val(st->var), ATEMP);
511						sbeg = s;
512 again_search:
513						pat = evalstr(wpat,
514						    DOTILDE | DOSCALAR | DOPAT);
515						/* check for special cases */
516						if (!*pat && !fpat) {
517							/*
518							 * empty unanchored
519							 * pattern => reject
520							 */
521							goto no_repl;
522						}
523						if ((stype & 0x180) &&
524						    gmatchx(null, pat, false)) {
525							/*
526							 * pattern matches empty
527							 * string => don't loop
528							 */
529							stype &= ~0x180;
530						}
531
532						/* first see if we have any match at all */
533						if (fpat == '#') {
534							/* anchor at the beginning */
535							tpat1 = shf_smprintf("%s%c*", pat, MAGIC);
536							tpat2 = tpat1;
537						} else if (fpat == '%') {
538							/* anchor at the end */
539							tpat1 = shf_smprintf("%c*%s", MAGIC, pat);
540							tpat2 = pat;
541						} else {
542							/* float */
543							tpat1 = shf_smprintf("%c*%s%c*", MAGIC, pat, MAGIC);
544							tpat2 = tpat1 + 2;
545						}
546 again_repl:
547						/*
548						 * this would not be necessary if gmatchx would return
549						 * the start and end values of a match found, like re*
550						 */
551						if (!gmatchx(sbeg, tpat1, false))
552							goto end_repl;
553						end = strnul(s);
554						/* now anchor the beginning of the match */
555						if (fpat != '#')
556							while (sbeg <= end) {
557								if (gmatchx(sbeg, tpat2, false))
558									break;
559								else
560									sbeg++;
561							}
562						/* now anchor the end of the match */
563						p = end;
564						if (fpat != '%')
565							while (p >= sbeg) {
566								bool gotmatch;
567
568								c = *p;
569								*p = '\0';
570								gotmatch = tobool(gmatchx(sbeg, pat, false));
571								*p = c;
572								if (gotmatch)
573									break;
574								p--;
575							}
576						strndupx(end, sbeg, p - sbeg, ATEMP);
577						record_match(end);
578						afree(end, ATEMP);
579						if (stype & 0x100) {
580							if (rrep != null)
581								afree(rrep, ATEMP);
582							rrep = wrep ? evalstr(wrep,
583							    DOTILDE | DOSCALAR) :
584							    null;
585						}
586						strndupx(end, s, sbeg - s, ATEMP);
587						d = shf_smprintf(Tf_sss, end, rrep, p);
588						afree(end, ATEMP);
589						sbeg = d + (sbeg - s) + strlen(rrep);
590						afree(s, ATEMP);
591						s = d;
592						if (stype & 0x100) {
593							afree(tpat1, ATEMP);
594							afree(pat, ATEMP);
595							goto again_search;
596						} else if (stype & 0x80)
597							goto again_repl;
598 end_repl:
599						afree(tpat1, ATEMP);
600						x.str = s;
601 no_repl:
602						afree(pat, ATEMP);
603						if (rrep != null)
604							afree(rrep, ATEMP);
605						afree(ws, ATEMP);
606						goto do_CSUBST;
607					    }
608					case '#':
609					case '%':
610						/* ! DOBLANK,DOBRACE */
611						f = (f & DONTRUNCOMMAND) |
612						    DOPAT | DOTILDE |
613						    DOTEMP | DOSCALAR;
614						tilde_ok = 1;
615						st->quotew = quote = 0;
616						/*
617						 * Prepend open pattern (so |
618						 * in a trim will work as
619						 * expected)
620						 */
621						if (!Flag(FSH)) {
622							*dp++ = MAGIC;
623							*dp++ = 0x80 | '@';
624						}
625						break;
626					case '=':
627						/*
628						 * Enabling tilde expansion
629						 * after :s here is
630						 * non-standard ksh, but is
631						 * consistent with rules for
632						 * other assignments. Not
633						 * sure what POSIX thinks of
634						 * this.
635						 * Not doing tilde expansion
636						 * for integer variables is a
637						 * non-POSIX thing - makes
638						 * sense though, since ~ is
639						 * a arithmetic operator.
640						 */
641						if (!(x.var->flag & INTEGER))
642							f |= DOASNTILDE | DOTILDE;
643						f |= DOTEMP;
644						/*
645						 * These will be done after the
646						 * value has been assigned.
647						 */
648						f &= ~(DOBLANK|DOGLOB|DOBRACE);
649						tilde_ok = 1;
650						break;
651					case '?':
652						if (*sp == CSUBST)
653							errorf("%s: parameter null or not set",
654							    st->var->name);
655						f &= ~DOBLANK;
656						f |= DOTEMP;
657						/* FALLTHROUGH */
658					default:
659						/* '-' '+' '?' */
660						if (quote)
661							word = IFS_WORD;
662						else if (dp == Xstring(ds, dp))
663							word = IFS_IWS;
664						/* Enable tilde expansion */
665						tilde_ok = 1;
666						f |= DOTILDE;
667					}
668				} else
669					/* skip word */
670					sp += wdscan(sp, CSUBST) - sp;
671				continue;
672			    }
673			case CSUBST:
674				/* only get here if expanding word */
675 do_CSUBST:
676				/* ({) skip the } or x */
677				sp++;
678				/* in case of ${unset:-} */
679				tilde_ok = 0;
680				*dp = '\0';
681				quote = st->quotep;
682				f = st->f;
683				if (f & DOBLANK)
684					doblank--;
685				switch (st->stype & 0x17F) {
686				case '#':
687				case '%':
688					if (!Flag(FSH)) {
689						/* Append end-pattern */
690						*dp++ = MAGIC;
691						*dp++ = ')';
692					}
693					*dp = '\0';
694					dp = Xrestpos(ds, dp, st->base);
695					/*
696					 * Must use st->var since calling
697					 * global would break things
698					 * like x[i+=1].
699					 */
700					x.str = trimsub(str_val(st->var),
701						dp, st->stype);
702					if (x.str[0] != '\0') {
703						word = IFS_IWS;
704						type = XSUB;
705					} else if (quote) {
706						word = IFS_WORD;
707						type = XSUB;
708					} else {
709						if (dp == Xstring(ds, dp))
710							word = IFS_IWS;
711						type = XNULLSUB;
712					}
713					if (f & DOBLANK)
714						doblank++;
715					st = st->prev;
716					continue;
717				case '=':
718					/*
719					 * Restore our position and substitute
720					 * the value of st->var (may not be
721					 * the assigned value in the presence
722					 * of integer/right-adj/etc attributes).
723					 */
724					dp = Xrestpos(ds, dp, st->base);
725					/*
726					 * Must use st->var since calling
727					 * global would cause with things
728					 * like x[i+=1] to be evaluated twice.
729					 */
730					/*
731					 * Note: not exported by FEXPORT
732					 * in AT&T ksh.
733					 */
734					/*
735					 * XXX POSIX says readonly is only
736					 * fatal for special builtins (setstr
737					 * does readonly check).
738					 */
739					len = strlen(dp) + 1;
740					setstr(st->var,
741					    debunk(alloc(len, ATEMP),
742					    dp, len), KSH_UNWIND_ERROR);
743					x.str = str_val(st->var);
744					type = XSUB;
745					if (f & DOBLANK)
746						doblank++;
747					st = st->prev;
748					word = quote || (!*x.str && (f & DOSCALAR)) ? IFS_WORD : IFS_IWS;
749					continue;
750				case '?':
751					dp = Xrestpos(ds, dp, st->base);
752
753					errorf(Tf_sD_s, st->var->name,
754					    debunk(dp, dp, strlen(dp) + 1));
755					break;
756				case '0':
757				case 0x100 | '/':
758				case '/':
759				case 0x100 | '#':
760				case 0x100 | 'Q':
761					dp = Xrestpos(ds, dp, st->base);
762					type = XSUB;
763					word = quote || (!*x.str && (f & DOSCALAR)) ? IFS_WORD : IFS_IWS;
764					if (f & DOBLANK)
765						doblank++;
766					st = st->prev;
767					continue;
768				/* default: '-' '+' */
769				}
770				st = st->prev;
771				type = XBASE;
772				continue;
773
774			case OPAT:
775				/* open pattern: *(foo|bar) */
776				/* Next char is the type of pattern */
777				make_magic = true;
778				c = *sp++ | 0x80;
779				break;
780
781			case SPAT:
782				/* pattern separator (|) */
783				make_magic = true;
784				c = '|';
785				break;
786
787			case CPAT:
788				/* close pattern */
789				make_magic = true;
790				c = /*(*/ ')';
791				break;
792			}
793			break;
794
795		case XNULLSUB:
796			/*
797			 * Special case for "$@" (and "${foo[@]}") - no
798			 * word is generated if $# is 0 (unless there is
799			 * other stuff inside the quotes).
800			 */
801			type = XBASE;
802			if (f & DOBLANK) {
803				doblank--;
804				if (dp == Xstring(ds, dp) && word != IFS_WORD)
805					word = IFS_IWS;
806			}
807			continue;
808
809		case XSUB:
810		case XSUBMID:
811			if ((c = *x.str++) == 0) {
812				type = XBASE;
813				if (f & DOBLANK)
814					doblank--;
815				continue;
816			}
817			break;
818
819		case XARGSEP:
820			type = XARG;
821			quote = 1;
822			/* FALLTHROUGH */
823		case XARG:
824			if ((c = *x.str++) == '\0') {
825				/*
826				 * force null words to be created so
827				 * set -- "" 2 ""; echo "$@" will do
828				 * the right thing
829				 */
830				if (quote && x.split)
831					word = IFS_WORD;
832				if ((x.str = *x.u.strv++) == NULL) {
833					type = XBASE;
834					if (f & DOBLANK)
835						doblank--;
836					continue;
837				}
838				c = ifs0;
839				if ((f & DOHEREDOC)) {
840					/* pseudo-field-split reliably */
841					if (c == 0)
842						c = ' ';
843					break;
844				}
845				if ((f & DOSCALAR)) {
846					/* do not field-split */
847					if (x.split) {
848						c = ' ';
849						break;
850					}
851					if (c == 0)
852						continue;
853				}
854				if (c == 0) {
855					if (quote && !x.split)
856						continue;
857					if (!quote && word == IFS_WS)
858						continue;
859					/* this is so we don't terminate */
860					c = ' ';
861					/* now force-emit a word */
862					goto emit_word;
863				}
864				if (quote && x.split) {
865					/* terminate word for "$@" */
866					type = XARGSEP;
867					quote = 0;
868				}
869			}
870			break;
871
872		case XCOM:
873			if (x.u.shf == NULL) {
874				/* $(<...) failed */
875				subst_exstat = 1;
876				/* fake EOF */
877				c = -1;
878			} else if (newlines) {
879				/* spit out saved NLs */
880				c = '\n';
881				--newlines;
882			} else {
883				while ((c = shf_getc(x.u.shf)) == 0 || c == '\n')
884					if (c == '\n')
885						/* save newlines */
886						newlines++;
887				if (newlines && c != -1) {
888					shf_ungetc(c, x.u.shf);
889					c = '\n';
890					--newlines;
891				}
892			}
893			if (c == -1) {
894				newlines = 0;
895				if (x.u.shf)
896					shf_close(x.u.shf);
897				if (x.split)
898					subst_exstat = waitlast();
899				type = XBASE;
900				if (f & DOBLANK)
901					doblank--;
902				continue;
903			}
904			break;
905		}
906
907		/* check for end of word or IFS separation */
908		if (c == 0 || (!quote && (f & DOBLANK) && doblank &&
909		    !make_magic && ctype(c, C_IFS))) {
910			/*-
911			 * How words are broken up:
912			 *			|	value of c
913			 *	word		|	ws	nws	0
914			 *	-----------------------------------
915			 *	IFS_WORD		w/WS	w/NWS	w
916			 *	IFS_WS			-/WS	-/NWS	-
917			 *	IFS_NWS			-/NWS	w/NWS	-
918			 *	IFS_IWS			-/WS	w/NWS	-
919			 * (w means generate a word)
920			 */
921			if ((word == IFS_WORD) || (word == IFS_QUOTE) || (c &&
922			    (word == IFS_IWS || word == IFS_NWS) &&
923			    !ctype(c, C_IFSWS))) {
924 emit_word:
925				if (f & DOHERESTR)
926					*dp++ = '\n';
927				*dp++ = '\0';
928				cp = Xclose(ds, dp);
929				if (fdo & DOBRACE)
930					/* also does globbing */
931					alt_expand(wp, cp, cp,
932					    cp + Xlength(ds, (dp - 1)),
933					    fdo | (f & DOMARKDIRS));
934				else if (fdo & DOGLOB)
935					glob(cp, wp, tobool(f & DOMARKDIRS));
936				else if ((f & DOPAT) || !(fdo & DOMAGIC))
937					XPput(*wp, cp);
938				else
939					XPput(*wp, debunk(cp, cp,
940					    strlen(cp) + 1));
941				fdo = 0;
942				saw_eq = false;
943				/* must be 1/0 */
944				tilde_ok = (f & (DOTILDE | DOASNTILDE)) ? 1 : 0;
945				if (c == 0)
946					return;
947				Xinit(ds, dp, 128, ATEMP);
948			} else if (c == 0) {
949				return;
950			} else if (type == XSUB && ctype(c, C_IFS) &&
951			    !ctype(c, C_IFSWS) && Xlength(ds, dp) == 0) {
952				*(cp = alloc(1, ATEMP)) = '\0';
953				XPput(*wp, cp);
954				type = XSUBMID;
955			}
956			if (word != IFS_NWS)
957				word = ctype(c, C_IFSWS) ? IFS_WS : IFS_NWS;
958		} else {
959			if (type == XSUB) {
960				if (word == IFS_NWS &&
961				    Xlength(ds, dp) == 0) {
962					*(cp = alloc(1, ATEMP)) = '\0';
963					XPput(*wp, cp);
964				}
965				type = XSUBMID;
966			}
967
968			/* age tilde_ok info - ~ code tests second bit */
969			tilde_ok <<= 1;
970			/* mark any special second pass chars */
971			if (!quote)
972				switch (c) {
973				case '[':
974				case '!':
975				case '-':
976				case ']':
977					/*
978					 * For character classes - doesn't hurt
979					 * to have magic !,-,]s outside of
980					 * [...] expressions.
981					 */
982					if (f & (DOPAT | DOGLOB)) {
983						fdo |= DOMAGIC;
984						if (c == '[')
985							fdo |= f & DOGLOB;
986						*dp++ = MAGIC;
987					}
988					break;
989				case '*':
990				case '?':
991					if (f & (DOPAT | DOGLOB)) {
992						fdo |= DOMAGIC | (f & DOGLOB);
993						*dp++ = MAGIC;
994					}
995					break;
996				case '{':
997				case '}':
998				case ',':
999					if ((f & DOBRACE) && (c == '{' /*}*/ ||
1000					    (fdo & DOBRACE))) {
1001						fdo |= DOBRACE|DOMAGIC;
1002						*dp++ = MAGIC;
1003					}
1004					break;
1005				case '=':
1006					/* Note first unquoted = for ~ */
1007					if (!(f & DOTEMP) && (!Flag(FPOSIX) ||
1008					    (f & DOASNTILDE)) && !saw_eq) {
1009						saw_eq = true;
1010						tilde_ok = 1;
1011					}
1012					break;
1013				case ':':
1014					/* : */
1015					/* Note unquoted : for ~ */
1016					if (!(f & DOTEMP) && (f & DOASNTILDE))
1017						tilde_ok = 1;
1018					break;
1019				case '~':
1020					/*
1021					 * tilde_ok is reset whenever
1022					 * any of ' " $( $(( ${ } are seen.
1023					 * Note that tilde_ok must be preserved
1024					 * through the sequence ${A=a=}~
1025					 */
1026					if (type == XBASE &&
1027					    (f & (DOTILDE | DOASNTILDE)) &&
1028					    (tilde_ok & 2)) {
1029						const char *tcp;
1030						char *tdp = dp;
1031
1032						tcp = maybe_expand_tilde(sp,
1033						    &ds, &tdp,
1034						    tobool(f & DOASNTILDE));
1035						if (tcp) {
1036							if (dp != tdp)
1037								word = IFS_WORD;
1038							dp = tdp;
1039							sp = tcp;
1040							continue;
1041						}
1042					}
1043					break;
1044				}
1045			else
1046				/* undo temporary */
1047				quote &= ~2;
1048
1049			if (make_magic) {
1050				make_magic = false;
1051				fdo |= DOMAGIC | (f & DOGLOB);
1052				*dp++ = MAGIC;
1053			} else if (ISMAGIC(c)) {
1054				fdo |= DOMAGIC;
1055				*dp++ = MAGIC;
1056			}
1057			/* save output char */
1058			*dp++ = c;
1059			word = IFS_WORD;
1060		}
1061	}
1062}
1063
1064static bool
1065hasnonempty(const char **strv)
1066{
1067	size_t i = 0;
1068
1069	while (strv[i])
1070		if (*strv[i++])
1071			return (true);
1072	return (false);
1073}
1074
1075/*
1076 * Prepare to generate the string returned by ${} substitution.
1077 */
1078static int
1079varsub(Expand *xp, const char *sp, const char *word,
1080    int *stypep,	/* becomes qualifier type */
1081    int *slenp)		/* " " len (=, :=, etc.) valid iff *stypep != 0 */
1082{
1083	int c;
1084	int state;	/* next state: XBASE, XARG, XSUB, XNULLSUB */
1085	int stype;	/* substitution type */
1086	int slen = 0;
1087	const char *p;
1088	struct tbl *vp;
1089	bool zero_ok = false;
1090
1091	if ((stype = sp[0]) == '\0')
1092		/* Bad variable name */
1093		return (-1);
1094
1095	xp->var = NULL;
1096
1097	/*-
1098	 * ${#var}, string length (-U: characters, +U: octets) or array size
1099	 * ${%var}, string width (-U: screen columns, +U: octets)
1100	 */
1101	c = sp[1];
1102	if (stype == '%' && c == '\0')
1103		return (-1);
1104	if ((stype == '#' || stype == '%') && c != '\0') {
1105		/* Can't have any modifiers for ${#...} or ${%...} */
1106		if (*word != CSUBST)
1107			return (-1);
1108		sp++;
1109		/* Check for size of array */
1110		if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
1111		    p[2] == ']') {
1112			int n = 0;
1113
1114			if (stype != '#')
1115				return (-1);
1116			vp = global(arrayname(sp));
1117			if (vp->flag & (ISSET|ARRAY))
1118				zero_ok = true;
1119			for (; vp; vp = vp->u.array)
1120				if (vp->flag & ISSET)
1121					n++;
1122			c = n;
1123		} else if (c == '*' || c == '@') {
1124			if (stype != '#')
1125				return (-1);
1126			c = e->loc->argc;
1127		} else {
1128			p = str_val(global(sp));
1129			zero_ok = p != null;
1130			if (stype == '#')
1131				c = utflen(p);
1132			else {
1133				/* partial utf_mbswidth reimplementation */
1134				const char *s = p;
1135				unsigned int wc;
1136				size_t len;
1137				int cw;
1138
1139				c = 0;
1140				while (*s) {
1141					if (!UTFMODE || (len = utf_mbtowc(&wc,
1142					    s)) == (size_t)-1)
1143						/* not UTFMODE or not UTF-8 */
1144						wc = (unsigned char)(*s++);
1145					else
1146						/* UTFMODE and UTF-8 */
1147						s += len;
1148					/* wc == char or wchar at s++ */
1149					if ((cw = utf_wcwidth(wc)) == -1) {
1150						/* 646, 8859-1, 10646 C0/C1 */
1151						c = -1;
1152						break;
1153					}
1154					c += cw;
1155				}
1156			}
1157		}
1158		if (Flag(FNOUNSET) && c == 0 && !zero_ok)
1159			errorf(Tf_parm, sp);
1160		/* unqualified variable/string substitution */
1161		*stypep = 0;
1162		xp->str = shf_smprintf(Tf_d, c);
1163		return (XSUB);
1164	}
1165	if (stype == '!' && c != '\0' && *word == CSUBST) {
1166		sp++;
1167		if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
1168		    p[2] == ']') {
1169			c = '!';
1170			stype = 0;
1171			goto arraynames;
1172		}
1173		xp->var = global(sp);
1174		xp->str = p ? shf_smprintf("%s[%lu]",
1175		    xp->var->name, arrayindex(xp->var)) : xp->var->name;
1176		*stypep = 0;
1177		return (XSUB);
1178	}
1179
1180	/* Check for qualifiers in word part */
1181	stype = 0;
1182	c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
1183	if (c == ':') {
1184		slen += 2;
1185		stype = 0x80;
1186		c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
1187	}
1188	if (!stype && c == '/') {
1189		slen += 2;
1190		stype = c;
1191		if (word[slen] == ADELIM && word[slen + 1] == c) {
1192			slen += 2;
1193			stype |= 0x80;
1194		}
1195	} else if (stype == 0x80 && (c == ' ' || c == '0')) {
1196		stype |= '0';
1197	} else if (ctype(c, C_SUBOP1)) {
1198		slen += 2;
1199		stype |= c;
1200	} else if (ctype(c, C_SUBOP2)) {
1201		/* Note: ksh88 allows :%, :%%, etc */
1202		slen += 2;
1203		stype = c;
1204		if (word[slen + 0] == CHAR && c == word[slen + 1]) {
1205			stype |= 0x80;
1206			slen += 2;
1207		}
1208	} else if (c == '@') {
1209		/* @x where x is command char */
1210		slen += 2;
1211		stype |= 0x100;
1212		if (word[slen] == CHAR) {
1213			stype |= word[slen + 1];
1214			slen += 2;
1215		}
1216	} else if (stype)
1217		/* : is not ok */
1218		return (-1);
1219	if (!stype && *word != CSUBST)
1220		return (-1);
1221
1222	c = sp[0];
1223	if (c == '*' || c == '@') {
1224		switch (stype & 0x17F) {
1225		/* can't assign to a vector */
1226		case '=':
1227		/* can't trim a vector (yet) */
1228		case '%':
1229		case '#':
1230		case '?':
1231		case '0':
1232		case 0x100 | '/':
1233		case '/':
1234		case 0x100 | '#':
1235		case 0x100 | 'Q':
1236			return (-1);
1237		}
1238		if (e->loc->argc == 0) {
1239			xp->str = null;
1240			xp->var = global(sp);
1241			state = c == '@' ? XNULLSUB : XSUB;
1242		} else {
1243			xp->u.strv = (const char **)e->loc->argv + 1;
1244			xp->str = *xp->u.strv++;
1245			/* $@ */
1246			xp->split = tobool(c == '@');
1247			state = XARG;
1248		}
1249		/* POSIX 2009? */
1250		zero_ok = true;
1251	} else if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
1252	    p[2] == ']') {
1253		XPtrV wv;
1254
1255		switch (stype & 0x17F) {
1256		/* can't assign to a vector */
1257		case '=':
1258		/* can't trim a vector (yet) */
1259		case '%':
1260		case '#':
1261		case '?':
1262		case '0':
1263		case 0x100 | '/':
1264		case '/':
1265		case 0x100 | '#':
1266		case 0x100 | 'Q':
1267			return (-1);
1268		}
1269		c = 0;
1270 arraynames:
1271		XPinit(wv, 32);
1272		vp = global(arrayname(sp));
1273		for (; vp; vp = vp->u.array) {
1274			if (!(vp->flag&ISSET))
1275				continue;
1276			XPput(wv, c == '!' ? shf_smprintf(Tf_lu,
1277			    arrayindex(vp)) :
1278			    str_val(vp));
1279		}
1280		if (XPsize(wv) == 0) {
1281			xp->str = null;
1282			state = p[1] == '@' ? XNULLSUB : XSUB;
1283			XPfree(wv);
1284		} else {
1285			XPput(wv, 0);
1286			xp->u.strv = (const char **)XPptrv(wv);
1287			xp->str = *xp->u.strv++;
1288			/* ${foo[@]} */
1289			xp->split = tobool(p[1] == '@');
1290			state = XARG;
1291		}
1292	} else {
1293		xp->var = global(sp);
1294		xp->str = str_val(xp->var);
1295		/* can't assign things like $! or $1 */
1296		if ((stype & 0x17F) == '=' && !*xp->str &&
1297		    ctype(*sp, C_VAR1 | C_DIGIT))
1298			return (-1);
1299		state = XSUB;
1300	}
1301
1302	c = stype & 0x7F;
1303	/* test the compiler's code generator */
1304	if (((stype < 0x100) && (ctype(c, C_SUBOP2) ||
1305	    (((stype & 0x80) ? *xp->str == '\0' : xp->str == null) &&
1306	    (state != XARG || (ifs0 || xp->split ?
1307	    (xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ?
1308	    c == '=' || c == '-' || c == '?' : c == '+'))) ||
1309	    stype == (0x80 | '0') || stype == (0x100 | '#') ||
1310	    stype == (0x100 | 'Q') || (stype & 0x7F) == '/')
1311		/* expand word instead of variable value */
1312		state = XBASE;
1313	if (Flag(FNOUNSET) && xp->str == null && !zero_ok &&
1314	    (ctype(c, C_SUBOP2) || (state != XBASE && c != '+')))
1315		errorf(Tf_parm, sp);
1316	*stypep = stype;
1317	*slenp = slen;
1318	return (state);
1319}
1320
1321/*
1322 * Run the command in $(...) and read its output.
1323 */
1324static int
1325comsub(Expand *xp, const char *cp, int fn MKSH_A_UNUSED)
1326{
1327	Source *s, *sold;
1328	struct op *t;
1329	struct shf *shf;
1330	uint8_t old_utfmode = UTFMODE;
1331
1332	s = pushs(SSTRING, ATEMP);
1333	s->start = s->str = cp;
1334	sold = source;
1335	t = compile(s, true);
1336	afree(s, ATEMP);
1337	source = sold;
1338
1339	UTFMODE = old_utfmode;
1340
1341	if (t == NULL)
1342		return (XBASE);
1343
1344	/* no waitlast() unless specifically enabled later */
1345	xp->split = false;
1346
1347	if (t->type == TCOM &&
1348	    *t->args == NULL && *t->vars == NULL && t->ioact != NULL) {
1349		/* $(<file) */
1350		struct ioword *io = *t->ioact;
1351		char *name;
1352
1353		switch (io->ioflag & IOTYPE) {
1354		case IOREAD:
1355			shf = shf_open(name = evalstr(io->ioname, DOTILDE),
1356				O_RDONLY, 0, SHF_MAPHI | SHF_CLEXEC);
1357			if (shf == NULL)
1358				warningf(!Flag(FTALKING), Tf_sD_s_sD_s,
1359				    name, Tcant_open, "$(<...) input",
1360				    cstrerror(errno));
1361			break;
1362		case IOHERE:
1363			if (!herein(io, &name)) {
1364				xp->str = name;
1365				/* as $(…) requires, trim trailing newlines */
1366				name += strlen(name);
1367				while (name > xp->str && name[-1] == '\n')
1368					--name;
1369				*name = '\0';
1370				return (XSUB);
1371			}
1372			shf = NULL;
1373			break;
1374		default:
1375			errorf(Tf_sD_s, T_funny_command,
1376			    snptreef(NULL, 32, Tft_R, io));
1377		}
1378	} else if (fn == FUNSUB) {
1379		int ofd1;
1380		struct temp *tf = NULL;
1381
1382		/*
1383		 * create a temporary file, open for reading and writing,
1384		 * with an shf open for reading (buffered) but yet unused
1385		 */
1386		maketemp(ATEMP, TT_FUNSUB, &tf);
1387		if (!tf->shf) {
1388			errorf(Tf_temp,
1389			    Tcreate, tf->tffn, cstrerror(errno));
1390		}
1391		/* extract shf from temporary file, unlink and free it */
1392		shf = tf->shf;
1393		unlink(tf->tffn);
1394		afree(tf, ATEMP);
1395		/* save stdout and let it point to the tempfile */
1396		ofd1 = savefd(1);
1397		ksh_dup2(shf_fileno(shf), 1, false);
1398		/*
1399		 * run tree, with output thrown into the tempfile,
1400		 * in a new function block
1401		 */
1402		valsub(t, NULL);
1403		subst_exstat = exstat & 0xFF;
1404		/* rewind the tempfile and restore regular stdout */
1405		lseek(shf_fileno(shf), (off_t)0, SEEK_SET);
1406		restfd(1, ofd1);
1407	} else if (fn == VALSUB) {
1408		xp->str = valsub(t, ATEMP);
1409		subst_exstat = exstat & 0xFF;
1410		return (XSUB);
1411	} else {
1412		int ofd1, pv[2];
1413
1414		openpipe(pv);
1415		shf = shf_fdopen(pv[0], SHF_RD, NULL);
1416		ofd1 = savefd(1);
1417		if (pv[1] != 1) {
1418			ksh_dup2(pv[1], 1, false);
1419			close(pv[1]);
1420		}
1421		execute(t, XXCOM | XPIPEO | XFORK, NULL);
1422		restfd(1, ofd1);
1423		startlast();
1424		/* waitlast() */
1425		xp->split = true;
1426	}
1427
1428	xp->u.shf = shf;
1429	return (XCOM);
1430}
1431
1432/*
1433 * perform #pattern and %pattern substitution in ${}
1434 */
1435static char *
1436trimsub(char *str, char *pat, int how)
1437{
1438	char *end = strnul(str);
1439	char *p, c;
1440
1441	switch (how & 0xFF) {
1442	case '#':
1443		/* shortest match at beginning */
1444		for (p = str; p <= end; p += utf_ptradj(p)) {
1445			c = *p; *p = '\0';
1446			if (gmatchx(str, pat, false)) {
1447				record_match(str);
1448				*p = c;
1449				return (p);
1450			}
1451			*p = c;
1452		}
1453		break;
1454	case '#'|0x80:
1455		/* longest match at beginning */
1456		for (p = end; p >= str; p--) {
1457			c = *p; *p = '\0';
1458			if (gmatchx(str, pat, false)) {
1459				record_match(str);
1460				*p = c;
1461				return (p);
1462			}
1463			*p = c;
1464		}
1465		break;
1466	case '%':
1467		/* shortest match at end */
1468		p = end;
1469		while (p >= str) {
1470			if (gmatchx(p, pat, false))
1471				goto trimsub_match;
1472			if (UTFMODE) {
1473				char *op = p;
1474				while ((p-- > str) && ((*p & 0xC0) == 0x80))
1475					;
1476				if ((p < str) || (p + utf_ptradj(p) != op))
1477					p = op - 1;
1478			} else
1479				--p;
1480		}
1481		break;
1482	case '%'|0x80:
1483		/* longest match at end */
1484		for (p = str; p <= end; p++)
1485			if (gmatchx(p, pat, false)) {
1486 trimsub_match:
1487				record_match(p);
1488				strndupx(end, str, p - str, ATEMP);
1489				return (end);
1490			}
1491		break;
1492	}
1493
1494	/* no match, return string */
1495	return (str);
1496}
1497
1498/*
1499 * glob
1500 * Name derived from V6's /etc/glob, the program that expanded filenames.
1501 */
1502
1503/* XXX cp not const 'cause slashes are temporarily replaced with NULs... */
1504static void
1505glob(char *cp, XPtrV *wp, bool markdirs)
1506{
1507	int oldsize = XPsize(*wp);
1508
1509	if (glob_str(cp, wp, markdirs) == 0)
1510		XPput(*wp, debunk(cp, cp, strlen(cp) + 1));
1511	else
1512		qsort(XPptrv(*wp) + oldsize, XPsize(*wp) - oldsize,
1513		    sizeof(void *), xstrcmp);
1514}
1515
1516#define GF_NONE		0
1517#define GF_EXCHECK	BIT(0)		/* do existence check on file */
1518#define GF_GLOBBED	BIT(1)		/* some globbing has been done */
1519#define GF_MARKDIR	BIT(2)		/* add trailing / to directories */
1520
1521/*
1522 * Apply file globbing to cp and store the matching files in wp. Returns
1523 * the number of matches found.
1524 */
1525int
1526glob_str(char *cp, XPtrV *wp, bool markdirs)
1527{
1528	int oldsize = XPsize(*wp);
1529	XString xs;
1530	char *xp;
1531
1532	Xinit(xs, xp, 256, ATEMP);
1533	globit(&xs, &xp, cp, wp, markdirs ? GF_MARKDIR : GF_NONE);
1534	Xfree(xs, xp);
1535
1536	return (XPsize(*wp) - oldsize);
1537}
1538
1539static void
1540globit(XString *xs,	/* dest string */
1541    char **xpp,		/* ptr to dest end */
1542    char *sp,		/* source path */
1543    XPtrV *wp,		/* output list */
1544    int check)		/* GF_* flags */
1545{
1546	char *np;		/* next source component */
1547	char *xp = *xpp;
1548	char *se;
1549	char odirsep;
1550
1551	/* This to allow long expansions to be interrupted */
1552	intrcheck();
1553
1554	if (sp == NULL) {
1555		/* end of source path */
1556		/*
1557		 * We only need to check if the file exists if a pattern
1558		 * is followed by a non-pattern (eg, foo*x/bar; no check
1559		 * is needed for foo* since the match must exist) or if
1560		 * any patterns were expanded and the markdirs option is set.
1561		 * Symlinks make things a bit tricky...
1562		 */
1563		if ((check & GF_EXCHECK) ||
1564		    ((check & GF_MARKDIR) && (check & GF_GLOBBED))) {
1565#define stat_check()	(stat_done ? stat_done : (stat_done = \
1566			    stat(Xstring(*xs, xp), &statb) < 0 ? -1 : 1))
1567			struct stat lstatb, statb;
1568			/* -1: failed, 1 ok, 0 not yet done */
1569			int stat_done = 0;
1570
1571			if (mksh_lstat(Xstring(*xs, xp), &lstatb) < 0)
1572				return;
1573			/*
1574			 * special case for systems which strip trailing
1575			 * slashes from regular files (eg, /etc/passwd/).
1576			 * SunOS 4.1.3 does this...
1577			 */
1578			if ((check & GF_EXCHECK) && xp > Xstring(*xs, xp) &&
1579			    mksh_cdirsep(xp[-1]) && !S_ISDIR(lstatb.st_mode) &&
1580			    (!S_ISLNK(lstatb.st_mode) ||
1581			    stat_check() < 0 || !S_ISDIR(statb.st_mode)))
1582				return;
1583			/*
1584			 * Possibly tack on a trailing / if there isn't already
1585			 * one and if the file is a directory or a symlink to a
1586			 * directory
1587			 */
1588			if (((check & GF_MARKDIR) && (check & GF_GLOBBED)) &&
1589			    xp > Xstring(*xs, xp) && !mksh_cdirsep(xp[-1]) &&
1590			    (S_ISDIR(lstatb.st_mode) ||
1591			    (S_ISLNK(lstatb.st_mode) && stat_check() > 0 &&
1592			    S_ISDIR(statb.st_mode)))) {
1593				*xp++ = '/';
1594				*xp = '\0';
1595			}
1596		}
1597		strndupx(np, Xstring(*xs, xp), Xlength(*xs, xp), ATEMP);
1598		XPput(*wp, np);
1599		return;
1600	}
1601
1602	if (xp > Xstring(*xs, xp))
1603		*xp++ = '/';
1604	while (mksh_cdirsep(*sp)) {
1605		Xcheck(*xs, xp);
1606		*xp++ = *sp++;
1607	}
1608	np = mksh_sdirsep(sp);
1609	if (np != NULL) {
1610		se = np;
1611		/* don't assume '/', can be multiple kinds */
1612		odirsep = *np;
1613		*np++ = '\0';
1614	} else {
1615		odirsep = '\0'; /* keep gcc quiet */
1616		se = sp + strlen(sp);
1617	}
1618
1619
1620	/*
1621	 * Check if sp needs globbing - done to avoid pattern checks for strings
1622	 * containing MAGIC characters, open [s without the matching close ],
1623	 * etc. (otherwise opendir() will be called which may fail because the
1624	 * directory isn't readable - if no globbing is needed, only execute
1625	 * permission should be required (as per POSIX)).
1626	 */
1627	if (!has_globbing(sp, se)) {
1628		XcheckN(*xs, xp, se - sp + 1);
1629		debunk(xp, sp, Xnleft(*xs, xp));
1630		xp += strlen(xp);
1631		*xpp = xp;
1632		globit(xs, xpp, np, wp, check);
1633	} else {
1634		DIR *dirp;
1635		struct dirent *d;
1636		char *name;
1637		size_t len, prefix_len;
1638
1639		/* xp = *xpp;	copy_non_glob() may have re-alloc'd xs */
1640		*xp = '\0';
1641		prefix_len = Xlength(*xs, xp);
1642		dirp = opendir(prefix_len ? Xstring(*xs, xp) : Tdot);
1643		if (dirp == NULL)
1644			goto Nodir;
1645		while ((d = readdir(dirp)) != NULL) {
1646			name = d->d_name;
1647			if (name[0] == '.' &&
1648			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1649				/* always ignore . and .. */
1650				continue;
1651			if ((*name == '.' && *sp != '.') ||
1652			    !gmatchx(name, sp, true))
1653				continue;
1654
1655			len = strlen(d->d_name) + 1;
1656			XcheckN(*xs, xp, len);
1657			memcpy(xp, name, len);
1658			*xpp = xp + len - 1;
1659			globit(xs, xpp, np, wp,
1660				(check & GF_MARKDIR) | GF_GLOBBED
1661				| (np ? GF_EXCHECK : GF_NONE));
1662			xp = Xstring(*xs, xp) + prefix_len;
1663		}
1664		closedir(dirp);
1665 Nodir:
1666		;
1667	}
1668
1669	if (np != NULL)
1670		*--np = odirsep;
1671}
1672
1673/* remove MAGIC from string */
1674char *
1675debunk(char *dp, const char *sp, size_t dlen)
1676{
1677	char *d;
1678	const char *s;
1679
1680	if ((s = cstrchr(sp, MAGIC))) {
1681		if (s - sp >= (ssize_t)dlen)
1682			return (dp);
1683		memmove(dp, sp, s - sp);
1684		for (d = dp + (s - sp); *s && (d - dp < (ssize_t)dlen); s++)
1685			if (!ISMAGIC(*s) || !(*++s & 0x80) ||
1686			    !vstrchr("*+?@! ", *s & 0x7f))
1687				*d++ = *s;
1688			else {
1689				/* extended pattern operators: *+?@! */
1690				if ((*s & 0x7f) != ' ')
1691					*d++ = *s & 0x7f;
1692				if (d - dp < (ssize_t)dlen)
1693					*d++ = '(';
1694			}
1695		*d = '\0';
1696	} else if (dp != sp)
1697		strlcpy(dp, sp, dlen);
1698	return (dp);
1699}
1700
1701/*
1702 * Check if p is an unquoted name, possibly followed by a / or :. If so
1703 * puts the expanded version in *dcp,dp and returns a pointer in p just
1704 * past the name, otherwise returns 0.
1705 */
1706static const char *
1707maybe_expand_tilde(const char *p, XString *dsp, char **dpp, bool isassign)
1708{
1709	XString ts;
1710	char *dp = *dpp;
1711	char *tp;
1712	const char *r;
1713
1714	Xinit(ts, tp, 16, ATEMP);
1715	/* : only for DOASNTILDE form */
1716	while (p[0] == CHAR && !mksh_cdirsep(p[1]) &&
1717	    (!isassign || p[1] != ':')) {
1718		Xcheck(ts, tp);
1719		*tp++ = p[1];
1720		p += 2;
1721	}
1722	*tp = '\0';
1723	r = (p[0] == EOS || p[0] == CHAR || p[0] == CSUBST) ?
1724	    do_tilde(Xstring(ts, tp)) : NULL;
1725	Xfree(ts, tp);
1726	if (r) {
1727		while (*r) {
1728			Xcheck(*dsp, dp);
1729			if (ISMAGIC(*r))
1730				*dp++ = MAGIC;
1731			*dp++ = *r++;
1732		}
1733		*dpp = dp;
1734		r = p;
1735	}
1736	return (r);
1737}
1738
1739/*
1740 * tilde expansion
1741 *
1742 * based on a version by Arnold Robbins
1743 */
1744char *
1745do_tilde(char *cp)
1746{
1747	char *dp = null;
1748#ifndef MKSH_NOPWNAM
1749	bool do_simplify = true;
1750#endif
1751
1752	if (cp[0] == '\0')
1753		dp = str_val(global("HOME"));
1754	else if (cp[0] == '+' && cp[1] == '\0')
1755		dp = str_val(global(TPWD));
1756	else if (ksh_isdash(cp))
1757		dp = str_val(global(TOLDPWD));
1758#ifndef MKSH_NOPWNAM
1759	else {
1760		dp = homedir(cp);
1761		do_simplify = false;
1762	}
1763#endif
1764
1765	/* if parameters aren't set, don't expand ~ */
1766	if (dp == NULL || dp == null)
1767		return (NULL);
1768
1769	/* simplify parameters as if cwd upon entry */
1770#ifndef MKSH_NOPWNAM
1771	if (do_simplify)
1772#endif
1773	  {
1774		strdupx(dp, dp, ATEMP);
1775		simplify_path(dp);
1776	}
1777	return (dp);
1778}
1779
1780#ifndef MKSH_NOPWNAM
1781/*
1782 * map userid to user's home directory.
1783 * note that 4.3's getpw adds more than 6K to the shell,
1784 * and the YP version probably adds much more.
1785 * we might consider our own version of getpwnam() to keep the size down.
1786 */
1787static char *
1788homedir(char *name)
1789{
1790	struct tbl *ap;
1791
1792	ap = ktenter(&homedirs, name, hash(name));
1793	if (!(ap->flag & ISSET)) {
1794		struct passwd *pw;
1795
1796		pw = getpwnam(name);
1797		if (pw == NULL)
1798			return (NULL);
1799		strdupx(ap->val.s, pw->pw_dir, APERM);
1800		ap->flag |= DEFINED|ISSET|ALLOC;
1801	}
1802	return (ap->val.s);
1803}
1804#endif
1805
1806static void
1807alt_expand(XPtrV *wp, char *start, char *exp_start, char *end, int fdo)
1808{
1809	unsigned int count = 0;
1810	char *brace_start, *brace_end, *comma = NULL;
1811	char *field_start;
1812	char *p = exp_start;
1813
1814	/* search for open brace */
1815	while ((p = strchr(p, MAGIC)) && p[1] != '{' /*}*/)
1816		p += 2;
1817	brace_start = p;
1818
1819	/* find matching close brace, if any */
1820	if (p) {
1821		comma = NULL;
1822		count = 1;
1823		p += 2;
1824		while (*p && count) {
1825			if (ISMAGIC(*p++)) {
1826				if (*p == '{' /*}*/)
1827					++count;
1828				else if (*p == /*{*/ '}')
1829					--count;
1830				else if (*p == ',' && count == 1)
1831					comma = p;
1832				++p;
1833			}
1834		}
1835	}
1836	/* no valid expansions... */
1837	if (!p || count != 0) {
1838		/*
1839		 * Note that given a{{b,c} we do not expand anything (this is
1840		 * what AT&T ksh does. This may be changed to do the {b,c}
1841		 * expansion. }
1842		 */
1843		if (fdo & DOGLOB)
1844			glob(start, wp, tobool(fdo & DOMARKDIRS));
1845		else
1846			XPput(*wp, debunk(start, start, end - start));
1847		return;
1848	}
1849	brace_end = p;
1850	if (!comma) {
1851		alt_expand(wp, start, brace_end, end, fdo);
1852		return;
1853	}
1854
1855	/* expand expression */
1856	field_start = brace_start + 2;
1857	count = 1;
1858	for (p = brace_start + 2; p != brace_end; p++) {
1859		if (ISMAGIC(*p)) {
1860			if (*++p == '{' /*}*/)
1861				++count;
1862			else if ((*p == /*{*/ '}' && --count == 0) ||
1863			    (*p == ',' && count == 1)) {
1864				char *news;
1865				int l1, l2, l3;
1866
1867				/*
1868				 * addition safe since these operate on
1869				 * one string (separate substrings)
1870				 */
1871				l1 = brace_start - start;
1872				l2 = (p - 1) - field_start;
1873				l3 = end - brace_end;
1874				news = alloc(l1 + l2 + l3 + 1, ATEMP);
1875				memcpy(news, start, l1);
1876				memcpy(news + l1, field_start, l2);
1877				memcpy(news + l1 + l2, brace_end, l3);
1878				news[l1 + l2 + l3] = '\0';
1879				alt_expand(wp, news, news + l1,
1880				    news + l1 + l2 + l3, fdo);
1881				field_start = p + 1;
1882			}
1883		}
1884	}
1885	return;
1886}
1887
1888/* helper function due to setjmp/longjmp woes */
1889static char *
1890valsub(struct op *t, Area *ap)
1891{
1892	char * volatile cp = NULL;
1893	struct tbl * volatile vp = NULL;
1894
1895	newenv(E_FUNC);
1896	newblock();
1897	if (ap)
1898		vp = local("REPLY", false);
1899	if (!kshsetjmp(e->jbuf))
1900		execute(t, XXCOM | XERROK, NULL);
1901	if (vp)
1902		strdupx(cp, str_val(vp), ap);
1903	quitenv(NULL);
1904
1905	return (cp);
1906}
1907