1cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/*	$NetBSD: regcomp.c,v 1.33 2012/03/13 21:13:43 christos Exp $	*/
2cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
34fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*-
44fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Copyright (c) 1992, 1993, 1994
54fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *	The Regents of the University of California.  All rights reserved.
64fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
74fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This code is derived from software contributed to Berkeley by
84fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Henry Spencer.
94fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Redistribution and use in source and binary forms, with or without
114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * modification, are permitted provided that the following conditions
124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * are met:
134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 1. Redistributions of source code must retain the above copyright
144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *    notice, this list of conditions and the following disclaimer.
154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 2. Redistributions in binary form must reproduce the above copyright
164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *    notice, this list of conditions and the following disclaimer in the
174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *    documentation and/or other materials provided with the distribution.
184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 3. Neither the name of the University nor the names of its contributors
194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *    may be used to endorse or promote products derived from this software
204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *    without specific prior written permission.
214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * SUCH DAMAGE.
334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
37cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/*-
38cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * Copyright (c) 1992, 1993, 1994 Henry Spencer.
39cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *
40cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * This code is derived from software contributed to Berkeley by
41cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * Henry Spencer.
42cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *
43cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * Redistribution and use in source and binary forms, with or without
44cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * modification, are permitted provided that the following conditions
45cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * are met:
46cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 1. Redistributions of source code must retain the above copyright
47cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *    notice, this list of conditions and the following disclaimer.
48cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 2. Redistributions in binary form must reproduce the above copyright
49cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *    notice, this list of conditions and the following disclaimer in the
50cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *    documentation and/or other materials provided with the distribution.
51cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 3. All advertising materials mentioning features or use of this software
52cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *    must display the following acknowledgement:
53cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *	This product includes software developed by the University of
54cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *	California, Berkeley and its contributors.
55cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 4. Neither the name of the University nor the names of its contributors
56cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *    may be used to endorse or promote products derived from this software
57cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *    without specific prior written permission.
58cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *
59cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * SUCH DAMAGE.
70cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *
71cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
72cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes */
73cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
74cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#include <sys/cdefs.h>
75cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#if defined(LIBC_SCCS) && !defined(lint)
76cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#if 0
77cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
78cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#else
79cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes__RCSID("$NetBSD: regcomp.c,v 1.33 2012/03/13 21:13:43 christos Exp $");
80cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif
81cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif /* LIBC_SCCS and not lint */
82cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
83cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#include "namespace.h"
844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include <sys/types.h>
85cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
86cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#include <assert.h>
874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include <ctype.h>
884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include <limits.h>
89cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#include <stdio.h>
904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include <stdlib.h>
91cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#include <string.h>
924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include <regex.h>
934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
94cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#ifdef __weak_alias
95cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes__weak_alias(regcomp,_regcomp)
96cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif
97cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include "utils.h"
994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include "regex2.h"
1004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include "cclass.h"
1024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include "cname.h"
1034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
1054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * parse structure, passed up and down to avoid global variables and
1064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * other clumsinesses
1074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
1084fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstruct parse {
109cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	const char *next;	/* next character in RE */
110cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	const char *end;	/* end of string (-> NUL normally) */
1114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int error;		/* has an error been seen? */
1124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sop *strip;		/* malloced strip */
1134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno ssize;		/* malloced strip size (allocated) */
1144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno slen;		/* malloced strip length (used) */
115cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t ncsalloc;	/* number of csets allocated */
1164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	struct re_guts *g;
1174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
1184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
1194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno pend[NPAREN];	/* -> ) ([0] unused) */
1204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross};
1214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
122cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/* ========= begin header generated by ./mkh ========= */
123cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#ifdef __cplusplus
124cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesextern "C" {
125cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif
126cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
127cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/* === regcomp.c === */
128cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_ere(struct parse *p, int stop, size_t reclimit);
129cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_ere_exp(struct parse *p, size_t reclimit);
130cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_str(struct parse *p);
131cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
132cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
133cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int p_count(struct parse *p);
134cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_bracket(struct parse *p);
135cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_b_term(struct parse *p, cset *cs);
136cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_b_cclass(struct parse *p, cset *cs);
137cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_b_eclass(struct parse *p, cset *cs);
138cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic char p_b_symbol(struct parse *p);
139cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic char p_b_coll_elem(struct parse *p, int endc);
140cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int othercase(int ch);
141cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void bothcases(struct parse *p, int ch);
142cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void ordinary(struct parse *p, int ch);
143cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void nonnewline(struct parse *p);
144cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
145cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int seterr(struct parse *p, int e);
146cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic cset *allocset(struct parse *p);
147cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void freeset(struct parse *p, cset *cs);
148cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic sopno freezeset(struct parse *p, cset *cs);
149cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int firstch(struct parse *p, cset *cs);
150cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int nch(struct parse *p, cset *cs);
151cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void mcadd(struct parse *p, cset *cs, const char *cp);
152cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#if 0
153cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void mcsub(cset *cs, char *cp);
154cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int mcin(cset *cs, char *cp);
155cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic char *mcfind(cset *cs, char *cp);
156cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif
157cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void mcinvert(struct parse *p, cset *cs);
158cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void mccase(struct parse *p, cset *cs);
159cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int isinsets(struct re_guts *g, int c);
160cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int samesets(struct re_guts *g, int c1, int c2);
161cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void categorize(struct parse *p, struct re_guts *g);
162cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic sopno dupl(struct parse *p, sopno start, sopno finish);
163cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void doemit(struct parse *p, sop op, sopno opnd);
164cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void doinsert(struct parse *p, sop op, sopno opnd, sopno pos);
165cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void dofwd(struct parse *p, sopno pos, sopno value);
166cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int enlarge(struct parse *p, sopno size);
167cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void stripsnug(struct parse *p, struct re_guts *g);
168cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void findmust(struct parse *p, struct re_guts *g);
169cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic sopno pluscount(struct parse *p, struct re_guts *g);
170cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
171cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#ifdef __cplusplus
172cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes}
173cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif
174cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/* ========= end header generated by ./mkh ========= */
1754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1764fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic char nuls[10];		/* place to point scanner in event of error */
1774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
1794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * macros for use with parse structure
1804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * BEWARE:  these know that the parse structure is named `p' !!!
1814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
1824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	PEEK()	(*p->next)
1834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	PEEK2()	(*(p->next+1))
1844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	MORE()	(p->next < p->end)
1854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	MORE2()	(p->next+1 < p->end)
1864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	SEE(c)	(MORE() && PEEK() == (c))
1874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
1884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
1894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
1904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	NEXT()	(p->next++)
1914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	NEXT2()	(p->next += 2)
1924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	NEXTn(n)	(p->next += (n))
1934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	GETNEXT()	(*p->next++)
1944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	SETERROR(e)	seterr(p, (e))
195cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define	REQUIRE(co, e)	(void) ((co) || SETERROR(e))
1964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
197cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define	MUSTEAT(c, e)	(void) (REQUIRE(MORE() && GETNEXT() == (c), e))
1984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
199cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define	EMIT(op, sopnd)	doemit(p, (sop)(op), sopnd)
2004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
2014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
2024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
2034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	HERE()		(p->slen)
2044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	THERE()		(p->slen - 1)
2054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	THERETHERE()	(p->slen - 2)
2064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	DROP(n)	(p->slen -= (n))
2074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
2084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#ifndef NDEBUG
2094fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int never = 0;		/* for use in asserts; shuts lint up */
2104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#else
2114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define	never	0		/* some <assert.h>s have bugs too */
2124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#endif
2134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
214cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define	MEMLIMIT	0x8000000
215cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define MEMSIZE(p) \
216cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \
217cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	(p)->ncsalloc * sizeof(cset) + \
218cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	(p)->ssize * sizeof(sop))
219cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define	RECLIMIT	256
220cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
2214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
2224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - regcomp - interface for parser and compilation
223cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = extern int regcomp(regex_t *, const char *, int);
224cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define	REG_BASIC	0000
225cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define	REG_EXTENDED	0001
226cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define	REG_ICASE	0002
227cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define	REG_NOSUB	0004
228cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define	REG_NEWLINE	0010
229cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define	REG_NOSPEC	0020
230cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define	REG_PEND	0040
231cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define	REG_DUMP	0200
2324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
2334fa7b105644222d9b35347c9d226ca8e011072ebColin Crossint				/* 0 success, otherwise REG_something */
234cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesregcomp(
235cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    regex_t *preg,
236cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    const char *pattern,
237cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int cflags)
2384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
2394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	struct parse pa;
2404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	struct re_guts *g;
2414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	struct parse *p = &pa;
2424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int i;
2434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	size_t len;
2444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#ifdef REDEBUG
2454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#	define	GOODFLAGS(f)	(f)
2464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#else
2474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#	define	GOODFLAGS(f)	((f)&~REG_DUMP)
2484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#endif
2494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
250cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(preg != NULL);
251cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(pattern != NULL);
252cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
2534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cflags = GOODFLAGS(cflags);
2544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
2554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return(REG_INVARG);
2564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
2574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (cflags&REG_PEND) {
2584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (preg->re_endp < pattern)
2594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			return(REG_INVARG);
2604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		len = preg->re_endp - pattern;
2614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	} else
262cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		len = strlen(pattern);
2634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
2644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* do the mallocs early so failure handling is easy */
2654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
2664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross							(NC-1)*sizeof(cat_t));
2674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (g == NULL)
2684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return(REG_ESPACE);
2694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
270cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	p->strip = malloc(p->ssize * sizeof(sop));
2714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->slen = 0;
2724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->strip == NULL) {
273cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		free(g);
2744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return(REG_ESPACE);
2754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
2764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
2774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* set things up */
2784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->g = g;
279cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	p->next = pattern;
2804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->end = p->next + len;
2814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->error = 0;
2824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->ncsalloc = 0;
2834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (i = 0; i < NPAREN; i++) {
2844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->pbegin[i] = 0;
2854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->pend[i] = 0;
2864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
2874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->csetsize = NC;
2884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->sets = NULL;
2894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->setbits = NULL;
2904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->ncsets = 0;
2914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->cflags = cflags;
2924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->iflags = 0;
2934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->nbol = 0;
2944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->neol = 0;
2954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->must = NULL;
2964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->mlen = 0;
2974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->nsub = 0;
2984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->ncategories = 1;	/* category 0 is "everything else" */
2994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->categories = &g->catspace[-(CHAR_MIN)];
3004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
3014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->backrefs = 0;
3024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
3034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* do it */
3044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	EMIT(OEND, 0);
3054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->firststate = THERE();
3064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (cflags&REG_EXTENDED)
307cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		p_ere(p, OUT, 0);
3084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	else if (cflags&REG_NOSPEC)
3094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p_str(p);
3104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	else
311cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		p_bre(p, OUT, OUT, 0);
3124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	EMIT(OEND, 0);
3134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->laststate = THERE();
3144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
3154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* tidy up loose ends and fill things in */
3164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	categorize(p, g);
3174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	stripsnug(p, g);
3184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	findmust(p, g);
3194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->nplus = pluscount(p, g);
3204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->magic = MAGIC2;
3214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	preg->re_nsub = g->nsub;
3224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	preg->re_g = g;
3234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	preg->re_magic = MAGIC1;
3244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#ifndef REDEBUG
3254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* not debugging, so can't rely on the assert() in regexec() */
3264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (g->iflags&BAD)
3274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_ASSERT);
3284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#endif
3294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
3304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* win or lose, we're done */
3314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->error != 0)	/* lose */
3324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		regfree(preg);
3334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(p->error);
3344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
3354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
3364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
3374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_ere - ERE parser top level, concatenation and alternation
338cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_ere(struct parse *p, int stop, size_t reclimit);
3394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
3404fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
341cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_ere(
342cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
343cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int stop,			/* character this ERE should end at */
344cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    size_t reclimit)
3454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
3464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	char c;
347cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sopno prevback = 0;	/* pacify gcc */
348cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sopno prevfwd = 0; 	/* pacify gcc */
3494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno conc;
3504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int first = 1;		/* is this the first alternative? */
3514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
352cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
353cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
354cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
355cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		p->error = REG_ESPACE;
356cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return;
357cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	}
358cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
3594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (;;) {
3604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* do a bunch of concatenated expressions */
3614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		conc = HERE();
3624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		while (MORE() && (c = PEEK()) != '|' && c != stop)
363cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			p_ere_exp(p, reclimit);
3644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
3654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
3664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (!EAT('|'))
3674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			break;		/* NOTE BREAK OUT */
3684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
3694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (first) {
3704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			INSERT(OCH_, conc);	/* offset is wrong */
3714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			prevfwd = conc;
3724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			prevback = conc;
3734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			first = 0;
3744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
3754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(OOR1, prevback);
3764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		prevback = THERE();
3774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		AHEAD(prevfwd);			/* fix previous offset */
3784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		prevfwd = HERE();
3794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OOR2, 0);			/* offset is very wrong */
3804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
3814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
3824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (!first) {		/* tail-end fixups */
3834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		AHEAD(prevfwd);
3844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(O_CH, prevback);
3854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
3864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
3874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(!MORE() || SEE(stop));
3884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
3894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
3904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
3914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
392cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_ere_exp(struct parse *p, size_t reclimit);
3934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
3944fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
395cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_ere_exp(
396cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
397cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    size_t reclimit)
3984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
3994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	char c;
4004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno pos;
4014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int count;
4024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int count2;
4034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno subno;
4044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int wascaret = 0;
4054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
406cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
407cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
4084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(MORE());		/* caller should have ensured this */
4094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	c = GETNEXT();
4104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
4114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	pos = HERE();
4124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	switch (c) {
4134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '(':
4144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(MORE(), REG_EPAREN);
4154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->nsub++;
4164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		subno = p->g->nsub;
4174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (subno < NPAREN)
4184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			p->pbegin[subno] = HERE();
4194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OLPAREN, subno);
4204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (!SEE(')'))
421cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			p_ere(p, ')', reclimit);
4224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (subno < NPAREN) {
4234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			p->pend[subno] = HERE();
4244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			assert(p->pend[subno] != 0);
4254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
4264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(ORPAREN, subno);
4274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		MUSTEAT(')', REG_EPAREN);
4284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
4294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#ifndef POSIX_MISTAKE
4304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case ')':		/* happens only if no current unmatched ( */
4314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/*
4324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		 * You may ask, why the ifndef?  Because I didn't notice
4334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		 * this until slightly too late for 1003.2, and none of the
4344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		 * other 1003.2 regular-expression reviewers noticed it at
4354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		 * all.  So an unmatched ) is legal POSIX, at least until
4364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		 * we can get it fixed.
4374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		 */
4384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_EPAREN);
4394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
4404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#endif
4414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '^':
4424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OBOL, 0);
4434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->iflags |= USEBOL;
4444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->nbol++;
4454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		wascaret = 1;
4464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
4474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '$':
4484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OEOL, 0);
4494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->iflags |= USEEOL;
4504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->neol++;
4514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
4524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '|':
4534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_EMPTY);
4544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
4554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '*':
4564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '+':
4574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '?':
4584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_BADRPT);
4594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
4604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '.':
4614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (p->g->cflags&REG_NEWLINE)
4624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			nonnewline(p);
4634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		else
4644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			EMIT(OANY, 0);
4654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
4664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '[':
4674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p_bracket(p);
4684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
4694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '\\':
4704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(MORE(), REG_EESCAPE);
4714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		c = GETNEXT();
4724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ordinary(p, c);
4734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
4744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '{':		/* okay as ordinary except if digit follows */
475cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		REQUIRE(!MORE() || !isdigit((unsigned char)PEEK()), REG_BADRPT);
4764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* FALLTHROUGH */
4774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	default:
4784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ordinary(p, c);
4794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
4804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
4814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
4824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (!MORE())
4834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
4844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	c = PEEK();
4854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* we call { a repetition if followed by a digit */
4864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (!( c == '*' || c == '+' || c == '?' ||
487cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	    (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ))
4884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;		/* no repetition, we're done */
4894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	NEXT();
4904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
4914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	REQUIRE(!wascaret, REG_BADRPT);
4924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	switch (c) {
4934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '*':	/* implemented as +? */
4944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* this case does not require the (y|) trick, noKLUDGE */
4954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		INSERT(OPLUS_, pos);
4964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(O_PLUS, pos);
4974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		INSERT(OQUEST_, pos);
4984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(O_QUEST, pos);
4994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
5004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '+':
5014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		INSERT(OPLUS_, pos);
5024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(O_PLUS, pos);
5034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
5044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '?':
5054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
5064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		INSERT(OCH_, pos);		/* offset slightly wrong */
5074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(OOR1, pos);		/* this one's right */
5084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		AHEAD(pos);			/* fix the OCH_ */
5094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OOR2, 0);			/* offset very wrong... */
5104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		AHEAD(THERE());			/* ...so fix it */
5114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(O_CH, THERETHERE());
5124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
5134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '{':
5144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		count = p_count(p);
5154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (EAT(',')) {
516cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			if (isdigit((unsigned char)PEEK())) {
5174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				count2 = p_count(p);
5184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				REQUIRE(count <= count2, REG_BADBR);
5194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			} else		/* single number with comma */
5204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				count2 = INFINITY;
5214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		} else		/* just a single number */
5224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			count2 = count;
523cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		repeat(p, pos, count, count2, 0);
5244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (!EAT('}')) {	/* error heuristics */
5254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			while (MORE() && PEEK() != '}')
5264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				NEXT();
5274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			REQUIRE(MORE(), REG_EBRACE);
5284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			SETERROR(REG_BADBR);
5294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
5304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
5314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
5324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
5334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (!MORE())
5344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
5354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	c = PEEK();
5364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (!( c == '*' || c == '+' || c == '?' ||
537cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	    (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ) )
5384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
5394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	SETERROR(REG_BADRPT);
5404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
5414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
5424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
5434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_str - string (no metacharacters) "parser"
544cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_str(struct parse *p);
5454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
5464fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
547cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_str(
548cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p)
5494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
550cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
551cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
552cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
5534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	REQUIRE(MORE(), REG_EMPTY);
5544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	while (MORE())
5554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ordinary(p, GETNEXT());
5564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
5574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
5584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
5594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_bre - BRE parser top level, anchoring and concatenation
560cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_bre(struct parse *p, int end1, \
561cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes ==	int end2, size_t reclimit);
5624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Giving end1 as OUT essentially eliminates the end1/end2 check.
5634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
5644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This implementation is a bit of a kludge, in that a trailing $ is first
5654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * taken as an ordinary character and then revised to be an anchor.  The
5664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * only undesirable side effect is that '$' gets included as a character
5674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * category in such cases.  This is fairly harmless; not worth fixing.
5684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * The amount of lookahead needed to avoid this kludge is excessive.
5694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
5704fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
571cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_bre(
572cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
5734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross    int end1,		/* first terminating character */
574cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int end2,		/* second terminating character */
575cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    size_t reclimit)
5764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
577cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sopno start;
5784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int first = 1;			/* first subexpression? */
5794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int wasdollar = 0;
5804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
581cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
582cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
583cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
584cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		p->error = REG_ESPACE;
585cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return;
586cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	}
587cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
588cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	start = HERE();
589cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
5904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (EAT('^')) {
5914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OBOL, 0);
5924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->iflags |= USEBOL;
5934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->nbol++;
5944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
5954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	while (MORE() && !SEETWO(end1, end2)) {
596cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		wasdollar = p_simp_re(p, first, reclimit);
5974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		first = 0;
5984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
5994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (wasdollar) {	/* oops, that was a trailing anchor */
6004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		DROP(1);
6014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OEOL, 0);
6024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->iflags |= USEEOL;
6034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->neol++;
6044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
6054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
6064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
6074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
6084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
6094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
6104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
611cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
6124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
6134fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int			/* was the simple RE an unbackslashed $? */
614cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_simp_re(
615cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
616cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int starordinary,		/* is a leading * an ordinary character? */
617cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    size_t reclimit)
6184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
6194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int c;
6204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int count;
6214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int count2;
622cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sopno pos, i;
6234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno subno;
6244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#	define	BACKSL	(1<<CHAR_BIT)
6254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
626cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
627cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
6284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	pos = HERE();		/* repetion op, if any, covers from here */
6294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
6304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(MORE());		/* caller should have ensured this */
6314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	c = GETNEXT();
6324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (c == '\\') {
6334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(MORE(), REG_EESCAPE);
634cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		c = BACKSL | (unsigned char)GETNEXT();
6354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
6364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	switch (c) {
6374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '.':
6384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (p->g->cflags&REG_NEWLINE)
6394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			nonnewline(p);
6404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		else
6414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			EMIT(OANY, 0);
6424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
6434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '[':
6444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p_bracket(p);
6454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
6464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'{':
6474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_BADRPT);
6484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
6494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'(':
6504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->nsub++;
6514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		subno = p->g->nsub;
6524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (subno < NPAREN)
6534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			p->pbegin[subno] = HERE();
6544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OLPAREN, subno);
6554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* the MORE here is an error heuristic */
6564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (MORE() && !SEETWO('\\', ')'))
657cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			p_bre(p, '\\', ')', reclimit);
6584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (subno < NPAREN) {
6594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			p->pend[subno] = HERE();
6604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			assert(p->pend[subno] != 0);
6614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
6624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(ORPAREN, subno);
6634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
6644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
6654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|')':	/* should not get here -- must be user */
6664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'}':
6674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_EPAREN);
6684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
6694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'1':
6704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'2':
6714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'3':
6724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'4':
6734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'5':
6744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'6':
6754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'7':
6764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'8':
6774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case BACKSL|'9':
6784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		i = (c&~BACKSL) - '0';
6794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		assert(i < NPAREN);
6804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (p->pend[i] != 0) {
6814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			assert(i <= p->g->nsub);
6824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			EMIT(OBACK_, i);
6834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			assert(p->pbegin[i] != 0);
6844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
6854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
6864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
6874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			EMIT(O_BACK, i);
6884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		} else
6894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			SETERROR(REG_ESUBREG);
6904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->backrefs = 1;
6914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
6924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '*':
6934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(starordinary, REG_BADRPT);
6944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* FALLTHROUGH */
6954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	default:
696cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		ordinary(p, c &~ BACKSL);
6974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
6984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
6994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
7004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (EAT('*')) {		/* implemented as +? */
7014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* this case does not require the (y|) trick, noKLUDGE */
7024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		INSERT(OPLUS_, pos);
7034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(O_PLUS, pos);
7044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		INSERT(OQUEST_, pos);
7054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(O_QUEST, pos);
7064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	} else if (EATTWO('\\', '{')) {
7074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		count = p_count(p);
7084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (EAT(',')) {
709cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			if (MORE() && isdigit((unsigned char)PEEK())) {
7104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				count2 = p_count(p);
7114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				REQUIRE(count <= count2, REG_BADBR);
7124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			} else		/* single number with comma */
7134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				count2 = INFINITY;
7144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		} else		/* just a single number */
7154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			count2 = count;
716cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		repeat(p, pos, count, count2, 0);
7174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (!EATTWO('\\', '}')) {	/* error heuristics */
7184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			while (MORE() && !SEETWO('\\', '}'))
7194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				NEXT();
7204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			REQUIRE(MORE(), REG_EBRACE);
7214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			SETERROR(REG_BADBR);
7224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
723cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	} else if (c == (unsigned char)'$')	/* $ (but not \$) ends it */
7244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return(1);
7254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
7264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(0);
7274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
7284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
7294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
7304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_count - parse a repetition count
731cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int p_count(struct parse *p);
7324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
7334fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int			/* the value */
734cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_count(
735cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p)
7364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
7374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int count = 0;
7384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int ndigits = 0;
7394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
740cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
741cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
742cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	while (MORE() && isdigit((unsigned char)PEEK()) && count <= DUPMAX) {
7434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		count = count*10 + (GETNEXT() - '0');
7444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ndigits++;
7454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
7464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
7474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
7484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(count);
7494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
7504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
7514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
7524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_bracket - parse a bracketed character list
753cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_bracket(struct parse *p);
7544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
7554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Note a significant property of this code:  if the allocset() did SETERROR,
7564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * no set operations are done.
7574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
7584fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
759cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_bracket(
760cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p)
7614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
7624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cset *cs;
7634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int invert = 0;
764cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
765cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
766cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cs = allocset(p);
767cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (cs == NULL)
768cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return;
7694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
7704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* Dept of Truly Sickening Special-Case Kludges */
771cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]",
772cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes					    (size_t)6) == 0) {
7734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OBOW, 0);
7744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		NEXTn(6);
7754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
7764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
777cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]",
778cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes					    (size_t)6) == 0) {
7794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OEOW, 0);
7804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		NEXTn(6);
7814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
7824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
7834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
7844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (EAT('^'))
7854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		invert++;	/* make note to invert set at end */
7864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (EAT(']'))
7874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		CHadd(cs, ']');
7884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	else if (EAT('-'))
7894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		CHadd(cs, '-');
7904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
7914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p_b_term(p, cs);
7924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (EAT('-'))
7934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		CHadd(cs, '-');
7944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	MUSTEAT(']', REG_EBRACK);
7954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
796cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (p->error != 0)	/* don't mess things up further */
7974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
7984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
7994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->g->cflags&REG_ICASE) {
800cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		ssize_t i;
8014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		int ci;
8024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
8034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		for (i = p->g->csetsize - 1; i >= 0; i--)
8044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			if (CHIN(cs, i) && isalpha(i)) {
805cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes				ci = othercase((int)i);
8064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				if (ci != i)
8074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross					CHadd(cs, ci);
8084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			}
8094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (cs->multis != NULL)
8104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			mccase(p, cs);
8114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
8124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (invert) {
813cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		ssize_t i;
8144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
8154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		for (i = p->g->csetsize - 1; i >= 0; i--)
8164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			if (CHIN(cs, i))
817cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes				CHsub(cs, (int)i);
8184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			else
819cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes				CHadd(cs, (int)i);
8204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (p->g->cflags&REG_NEWLINE)
8214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			CHsub(cs, '\n');
8224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (cs->multis != NULL)
8234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			mcinvert(p, cs);
8244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
8254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
8264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(cs->multis == NULL);		/* xxx */
8274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
8284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (nch(p, cs) == 1) {		/* optimize singleton sets */
8294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ordinary(p, firstch(p, cs));
8304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		freeset(p, cs);
8314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	} else
8324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OANYOF, freezeset(p, cs));
8334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
8344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
8354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
8364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_b_term - parse one term of a bracketed character list
837cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_b_term(struct parse *p, cset *cs);
8384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
8394fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
840cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_b_term(
841cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
842cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs)
8434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
8444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	char c;
8454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	char start, finish;
8464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int i;
8474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
848cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
849cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
850cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
8514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* classify what we've got */
8524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	switch ((MORE()) ? PEEK() : '\0') {
8534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '[':
8544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		c = (MORE2()) ? PEEK2() : '\0';
8554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
856cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
8574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '-':
8584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_ERANGE);
8594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;			/* NOTE RETURN */
860cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
8614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	default:
8624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		c = '\0';
8634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
8644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
8654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
8664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	switch (c) {
8674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case ':':		/* character class */
8684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		NEXT2();
8694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(MORE(), REG_EBRACK);
8704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		c = PEEK();
8714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
8724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p_b_cclass(p, cs);
8734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(MORE(), REG_EBRACK);
8744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
8754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
8764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case '=':		/* equivalence class */
8774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		NEXT2();
8784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(MORE(), REG_EBRACK);
8794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		c = PEEK();
8804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
8814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p_b_eclass(p, cs);
8824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(MORE(), REG_EBRACK);
8834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
8844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
8854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	default:		/* symbol, ordinary character, or range */
8864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* xxx revision needed for multichar stuff */
8874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		start = p_b_symbol(p);
8884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (SEE('-') && MORE2() && PEEK2() != ']') {
8894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			/* range */
8904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			NEXT();
8914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			if (EAT('-'))
8924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				finish = '-';
8934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			else
8944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				finish = p_b_symbol(p);
8954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		} else
8964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			finish = start;
8974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* xxx what about signed chars here... */
8984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		REQUIRE(start <= finish, REG_ERANGE);
8994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		for (i = start; i <= finish; i++)
9004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			CHadd(cs, i);
9014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
9024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
9034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
9044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
9054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
9064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_b_cclass - parse a character-class name and deal with it
907cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_b_cclass(struct parse *p, cset *cs);
9084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
9094fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
910cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_b_cclass(
911cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
912cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs)
9134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
914cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	const char *sp;
91550ace4fec5e8cb5afcbc656a4556fa528adfd760David 'Digit' Turner	const struct cclass *cp;
9164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	size_t len;
917cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	const char *u;
9184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	char c;
9194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
920cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
921cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
922cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
923cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sp = p->next;
924cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
925cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	while (MORE() && isalpha((unsigned char)PEEK()))
9264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		NEXT();
9274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	len = p->next - sp;
9284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (cp = cclasses; cp->name != NULL; cp++)
9294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
9304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			break;
9314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (cp->name == NULL) {
9324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* oops, didn't find it */
9334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_ECTYPE);
9344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
9354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
9364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
9374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	u = cp->chars;
9384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	while ((c = *u++) != '\0')
9394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		CHadd(cs, c);
9404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
9414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		MCadd(p, cs, u);
9424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
9434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
9444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
9454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_b_eclass - parse an equivalence-class name and deal with it
946cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_b_eclass(struct parse *p, cset *cs);
9474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
9484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This implementation is incomplete. xxx
9494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
9504fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
951cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_b_eclass(
952cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
953cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs)
9544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
9554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	char c;
9564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
957cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
958cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
959cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
9604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	c = p_b_coll_elem(p, '=');
9614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	CHadd(cs, c);
9624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
9634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
9644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
9654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
966cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static char p_b_symbol(struct parse *p);
9674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
9684fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic char			/* value of symbol */
969cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_b_symbol(
970cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p)
9714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
9724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	char value;
9734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
974cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
975cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
9764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	REQUIRE(MORE(), REG_EBRACK);
9774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (!EATTWO('[', '.'))
9784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return(GETNEXT());
9794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
9804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* collating symbol */
9814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	value = p_b_coll_elem(p, '.');
9824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
9834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(value);
9844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
9854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
9864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
9874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_b_coll_elem - parse a collating-element name and look it up
988cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static char p_b_coll_elem(struct parse *p, int endc);
9894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
9904fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic char			/* value of collating element */
991cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_b_coll_elem(
992cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
9934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross    int endc)			/* name ended by endc,']' */
9944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
995cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	const char *sp;
99650ace4fec5e8cb5afcbc656a4556fa528adfd760David 'Digit' Turner	const struct cname *cp;
997cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t len;
998cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
999cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1000cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1001cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sp = p->next;
10024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
10034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	while (MORE() && !SEETWO(endc, ']'))
10044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		NEXT();
10054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (!MORE()) {
10064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_EBRACK);
10074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return(0);
10084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
10094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	len = p->next - sp;
10104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (cp = cnames; cp->name != NULL; cp++)
10114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
10124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			return(cp->code);	/* known name */
10134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (len == 1)
10144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return(*sp);	/* single character */
10154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	SETERROR(REG_ECOLLATE);			/* neither */
10164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(0);
10174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
10184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
10194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
10204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - othercase - return the case counterpart of an alphabetic
1021cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int othercase(int ch);
10224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
1023cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int			/* if no counterpart, return ch */
1024cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesothercase(
1025cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int ch)
10264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
10274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(isalpha(ch));
10284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (isupper(ch))
1029cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return(tolower(ch));
10304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	else if (islower(ch))
1031cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return(toupper(ch));
10324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	else			/* peculiar, but could happen */
10334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return(ch);
10344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
10354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
10364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
10374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - bothcases - emit a dualcase version of a two-case character
1038cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void bothcases(struct parse *p, int ch);
10394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
10404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Boy, is this implementation ever a kludge...
10414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
10424fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1043cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesbothcases(
1044cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1045cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int ch)
10464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1047cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	const char *oldnext;
1048cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	const char *oldend;
10494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	char bracket[3];
10504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1051cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1052cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1053cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	oldnext = p->next;
1054cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	oldend = p->end;
1055cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
10564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
10574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->next = bracket;
10584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->end = bracket+2;
10594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	bracket[0] = ch;
10604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	bracket[1] = ']';
10614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	bracket[2] = '\0';
10624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p_bracket(p);
10634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(p->next == bracket+2);
10644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->next = oldnext;
10654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->end = oldend;
10664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
10674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
10684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
10694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - ordinary - emit an ordinary character
1070cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void ordinary(struct parse *p, int ch);
10714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
10724fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1073cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesordinary(
1074cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1075cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int ch)
10764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1077cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cat_t *cap;
1078cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1079cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
10804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1081cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cap = p->g->categories;
1082cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if ((p->g->cflags&REG_ICASE) && isalpha((unsigned char) ch)
1083cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	    && othercase((unsigned char) ch) != (unsigned char) ch)
1084cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		bothcases(p, (unsigned char) ch);
10854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	else {
1086cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		EMIT(OCHAR, (sopno)(unsigned char)ch);
1087cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		if (cap[ch] == 0) {
1088cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			_DIAGASSERT(__type_fit(unsigned char,
1089cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			    p->g->ncategories + 1));
1090cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			cap[ch] = (unsigned char)p->g->ncategories++;
1091cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		}
10924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
10934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
10944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
10954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
10964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - nonnewline - emit REG_NEWLINE version of OANY
1097cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void nonnewline(struct parse *p);
10984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
10994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Boy, is this implementation ever a kludge...
11004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
11014fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1102cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesnonnewline(
1103cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p)
11044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1105cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	const char *oldnext;
1106cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	const char *oldend;
11074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	char bracket[4];
11084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1109cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1110cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1111cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	oldnext = p->next;
1112cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	oldend = p->end;
1113cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
11144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->next = bracket;
11154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->end = bracket+3;
11164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	bracket[0] = '^';
11174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	bracket[1] = '\n';
11184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	bracket[2] = ']';
11194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	bracket[3] = '\0';
11204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p_bracket(p);
11214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(p->next == bracket+3);
11224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->next = oldnext;
11234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->end = oldend;
11244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
11254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
11264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
11274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - repeat - generate code for a bounded repetition, recursively if needed
1128cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void repeat(struct parse *p, sopno start, int from, int to,
1129cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == size_t reclimit);
11304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
11314fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1132cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesrepeat(
1133cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
11344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross    sopno start,		/* operand from here to end of strip */
11354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross    int from,			/* repeated from this number */
1136cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int to,			/* to this number of times (maybe INFINITY) */
1137cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    size_t reclimit)
11384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1139cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sopno finish;
11404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#	define	N	2
11414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#	define	INF	3
11424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#	define	REP(f, t)	((f)*8 + (t))
11434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
11444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno copy;
11454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1146cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1147cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1148cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (reclimit++ > RECLIMIT)
1149cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		p->error = REG_ESPACE;
1150cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (p->error)
11514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
11524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1153cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	finish = HERE();
1154cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
11554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(from <= to);
11564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
11574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	switch (REP(MAP(from), MAP(to))) {
11584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case REP(0, 0):			/* must be user doing this */
11594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		DROP(finish-start);	/* drop the operand */
11604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
11614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case REP(0, 1):			/* as x{1,1}? */
11624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case REP(0, N):			/* as x{1,n}? */
11634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case REP(0, INF):		/* as x{1,}? */
11644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
11654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		INSERT(OCH_, start);		/* offset is wrong... */
1166cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		repeat(p, start+1, 1, to, reclimit);
11674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(OOR1, start);
11684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		AHEAD(start);			/* ... fix it */
11694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OOR2, 0);
11704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		AHEAD(THERE());
11714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(O_CH, THERETHERE());
11724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
11734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case REP(1, 1):			/* trivial case */
11744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* done */
11754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
11764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case REP(1, N):			/* as x?x{1,n-1} */
11774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
11784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		INSERT(OCH_, start);
11794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(OOR1, start);
11804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		AHEAD(start);
11814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		EMIT(OOR2, 0);			/* offset very wrong... */
11824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		AHEAD(THERE());			/* ...so fix it */
11834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(O_CH, THERETHERE());
11844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		copy = dupl(p, start+1, finish+1);
11854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		assert(copy == finish+4);
1186cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		repeat(p, copy, 1, to-1, reclimit);
11874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
11884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case REP(1, INF):		/* as x+ */
11894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		INSERT(OPLUS_, start);
11904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		ASTERN(O_PLUS, start);
11914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
11924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case REP(N, N):			/* as xx{m-1,n-1} */
11934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		copy = dupl(p, start, finish);
1194cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		repeat(p, copy, from-1, to-1, reclimit);
11954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
11964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	case REP(N, INF):		/* as xx{n-1,INF} */
11974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		copy = dupl(p, start, finish);
1198cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		repeat(p, copy, from-1, to, reclimit);
11994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
12004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	default:			/* "can't happen" */
12014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_ASSERT);	/* just in case */
12024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		break;
12034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
12044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
12054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
12064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
12074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - seterr - set an error condition
1208cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int seterr(struct parse *p, int e);
12094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
12104fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int			/* useless but makes type checking happy */
1211cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesseterr(
1212cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1213cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int e)
12144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1215cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1216cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1217cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
12184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->error == 0)	/* keep earliest error condition */
12194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->error = e;
12204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->next = nuls;		/* try to bring things to a halt */
12214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->end = nuls;
12224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(0);		/* make the return value well-defined */
12234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
12244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
12254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
12264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - allocset - allocate a set of characters for []
1227cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static cset *allocset(struct parse *p);
12284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
12294fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic cset *
1230cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesallocset(
1231cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p)
12324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1233cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t no;
12344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	size_t nc;
12354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	size_t nbytes;
12364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cset *cs;
1237cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t css;
1238cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t i;
12394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1240cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
12414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1242cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	no = p->g->ncsets++;
1243cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	css = (size_t)p->g->csetsize;
1244cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (no >= p->ncsalloc) {	/* need another column of space */
12454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->ncsalloc += CHAR_BIT;
12464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		nc = p->ncsalloc;
12474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		assert(nc % CHAR_BIT == 0);
12484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		nbytes = nc / CHAR_BIT * css;
1249cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		if (MEMSIZE(p) > MEMLIMIT)
1250cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			goto oomem;
1251cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		if (p->g->sets == NULL)
1252cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			p->g->sets = malloc(nc * sizeof(cset));
1253cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		else
1254cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			p->g->sets = realloc(p->g->sets, nc * sizeof(cset));
1255cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		if (p->g->setbits == NULL)
1256cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			p->g->setbits = malloc(nbytes);
1257cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		else {
1258cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			p->g->setbits = realloc(p->g->setbits, nbytes);
1259cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			/* xxx this isn't right if setbits is now NULL */
1260cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			for (i = 0; i < no; i++)
1261cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes				p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1262cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		}
1263cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		if (p->g->sets != NULL && p->g->setbits != NULL)
1264cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			(void) memset((char *)p->g->setbits + (nbytes - css),
1265cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes								0, css);
1266cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		else {
1267cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesoomem:
1268cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			no = 0;
1269cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			SETERROR(REG_ESPACE);
1270cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			/* caller's responsibility not to do set ops */
1271cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			return NULL;
1272cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		}
12734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
12744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
12754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cs = &p->g->sets[no];
12764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1277cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cs->mask = 1 << (unsigned int)((no) % CHAR_BIT);
12784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cs->hash = 0;
12794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cs->smultis = 0;
12804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cs->multis = NULL;
12814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
12824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(cs);
12834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
12844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
12854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
12864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - freeset - free a now-unused set
1287cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void freeset(struct parse *p, cset *cs);
12884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
12894fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1290cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesfreeset(
1291cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1292cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs)
12934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1294cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t i;
1295cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cset *top;
1296cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t css;
1297cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1298cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1299cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
13004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1301cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	top = &p->g->sets[p->g->ncsets];
1302cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	css = (size_t)p->g->csetsize;
1303cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1304cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	for (i = 0; i < css; i++)
1305cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		CHsub(cs, (int)i);
13064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (cs == top-1)	/* recover only the easy case */
13074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		p->g->ncsets--;
13084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
13094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
13104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
13114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - freezeset - final processing on a set of characters
1312cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int freezeset(struct parse *p, cset *cs);
13134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
13144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * The main task here is merging identical sets.  This is usually a waste
13154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * of time (although the hash code minimizes the overhead), but can win
13164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
13174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * is done using addition rather than xor -- all ASCII [aA] sets xor to
13184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * the same value!
13194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
1320cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic sopno			/* set number */
1321cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesfreezeset(
1322cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1323cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs)
13244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1325cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	uch h;
1326cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t i;
1327cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cset *top;
13284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cset *cs2;
1329cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t css;
1330cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1331cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1332cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
1333cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1334cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	h = cs->hash;
1335cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	top = &p->g->sets[p->g->ncsets];
1336cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	css = (size_t)p->g->csetsize;
13374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
13384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* look for an earlier one which is the same */
13394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
13404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (cs2->hash == h && cs2 != cs) {
13414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			/* maybe */
1342cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			for (i = 0; i < css; i++)
13434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
13444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross					break;		/* no */
1345cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			if (i == css)
13464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				break;			/* yes */
13474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
13484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
13494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (cs2 < top) {	/* found one */
13504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		freeset(p, cs);
13514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		cs = cs2;
13524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
13534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1354cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	return (sopno)(cs - p->g->sets);
13554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
13564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
13574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
13584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - firstch - return first character in a set (which must have at least one)
1359cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int firstch(struct parse *p, cset *cs);
13604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
13614fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int			/* character; there is no "none" value */
1362cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesfirstch(
1363cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1364cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs)
13654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1366cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t i;
1367cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t css;
1368cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1369cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1370cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
13714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1372cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	css = (size_t)p->g->csetsize;
1373cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1374cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	for (i = 0; i < css; i++)
13754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (CHIN(cs, i))
13764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			return((char)i);
13774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(never);
13784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(0);		/* arbitrary */
13794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
13804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
13814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
13824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - nch - number of characters in a set
1383cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int nch(struct parse *p, cset *cs);
13844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
13854fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int
1386cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesnch(
1387cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1388cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs)
13894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1390cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t i;
1391cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t css;
13924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int n = 0;
13934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1394cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1395cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
1396cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1397cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	css = (size_t)p->g->csetsize;
1398cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1399cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	for (i = 0; i < css; i++)
14004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (CHIN(cs, i))
14014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			n++;
14024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(n);
14034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
14044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
14054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
14064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - mcadd - add a collating element to a cset
1407cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void mcadd(struct parse *p, cset *cs, \
1408cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes ==	char *cp);
14094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
14104fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1411cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmcadd(
1412cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1413cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs,
1414cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    const char *cp)
14154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1416cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t oldend;
1417cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1418cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1419cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
1420cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cp != NULL);
1421cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1422cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	oldend = cs->smultis;
14234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
14244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cs->smultis += strlen(cp) + 1;
1425cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (cs->multis == NULL)
1426cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		cs->multis = malloc(cs->smultis);
1427cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	else
1428cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		cs->multis = realloc(cs->multis, cs->smultis);
1429cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (cs->multis == NULL) {
14304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_ESPACE);
14314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
14324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
14334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1434cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	(void) strcpy(cs->multis + oldend - 1, cp);
1435cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cs->multis[cs->smultis - 1] = '\0';
14364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
14374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1438cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#if 0
1439cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/*
1440cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes - mcsub - subtract a collating element from a cset
1441cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void mcsub(cset *cs, char *cp);
1442cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes */
1443cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void
1444cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmcsub(
1445cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs,
1446cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    char *cp)
1447cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes{
1448cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	char *fp;
1449cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t len;
1450cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1451cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
1452cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cp != NULL);
1453cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1454cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	fp = mcfind(cs, cp);
1455cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	len = strlen(fp);
1456cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1457cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	assert(fp != NULL);
1458cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	(void) memmove(fp, fp + len + 1,
1459cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes				cs->smultis - (fp + len + 1 - cs->multis));
1460cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cs->smultis -= len;
1461cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1462cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (cs->smultis == 0) {
1463cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		free(cs->multis);
1464cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		cs->multis = NULL;
1465cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return;
1466cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	}
1467cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1468cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cs->multis = realloc(cs->multis, cs->smultis);
1469cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	assert(cs->multis != NULL);
1470cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes}
1471cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1472cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/*
1473cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes - mcin - is a collating element in a cset?
1474cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int mcin(cset *cs, char *cp);
1475cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes */
1476cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int
1477cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmcin(
1478cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs,
1479cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    char *cp)
1480cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes{
1481cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1482cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
1483cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cp != NULL);
1484cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1485cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	return(mcfind(cs, cp) != NULL);
1486cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes}
1487cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1488cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/*
1489cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes - mcfind - find a collating element in a cset
1490cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static char *mcfind(cset *cs, char *cp);
1491cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes */
1492cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic char *
1493cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmcfind(
1494cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs,
1495cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    char *cp)
1496cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes{
1497cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	char *p;
1498cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1499cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
1500cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cp != NULL);
1501cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1502cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (cs->multis == NULL)
1503cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return(NULL);
1504cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1505cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		if (strcmp(cp, p) == 0)
1506cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			return(p);
1507cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	return(NULL);
1508cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes}
1509cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif
1510cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
15114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
15124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - mcinvert - invert the list of collating elements in a cset
1513cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void mcinvert(struct parse *p, cset *cs);
15144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
15154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This would have to know the set of possibilities.  Implementation
15164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * is deferred.
15174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
15184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* ARGSUSED */
15194fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1520cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmcinvert(
1521cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1522cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs)
15234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1524cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1525cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1526cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
1527cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
15284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(cs->multis == NULL);	/* xxx */
15294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
15304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
15314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
15324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - mccase - add case counterparts of the list of collating elements in a cset
1533cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void mccase(struct parse *p, cset *cs);
15344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
15354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This would have to know the set of possibilities.  Implementation
15364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * is deferred.
15374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
15384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* ARGSUSED */
15394fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1540cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmccase(
1541cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1542cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    cset *cs)
15434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1544cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1545cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1546cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(cs != NULL);
1547cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
15484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(cs->multis == NULL);	/* xxx */
15494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
15504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
15514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
15524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - isinsets - is this character in any sets?
1553cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int isinsets(struct re_guts *g, int c);
15544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
15554fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int			/* predicate */
1556cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesisinsets(
1557cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct re_guts *g,
1558cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int c)
15594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
15604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	uch *col;
1561cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t i;
1562cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t ncols;
1563cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	unsigned uc = (unsigned char)c;
1564cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1565cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(g != NULL);
1566cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1567cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (g->setbits == NULL)
1568cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return 0;
1569cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1570cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
15714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
15724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
15734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (col[uc] != 0)
15744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			return(1);
15754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(0);
15764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
15774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
15784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
15794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - samesets - are these two characters in exactly the same sets?
1580cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int samesets(struct re_guts *g, int c1, int c2);
15814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
15824fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int			/* predicate */
1583cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughessamesets(
1584cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct re_guts *g,
1585cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int c1,
1586cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    int c2)
15874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
15884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	uch *col;
1589cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t i;
1590cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	size_t ncols;
1591cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	unsigned uc1 = (unsigned char)c1;
1592cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	unsigned uc2 = (unsigned char)c2;
1593cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1594cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(g != NULL);
1595cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1596cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
15974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
15984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
15994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (col[uc1] != col[uc2])
16004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			return(0);
16014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(1);
16024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
16034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
16044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
16054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - categorize - sort out character categories
1606cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void categorize(struct parse *p, struct re_guts *g);
16074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
16084fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1609cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughescategorize(
1610cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1611cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct re_guts *g)
16124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1613cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cat_t *cats;
16144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int c;
16154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int c2;
16164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cat_t cat;
16174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1618cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1619cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(g != NULL);
1620cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1621cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	cats = g->categories;
1622cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
16234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* avoid making error situations worse */
16244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->error != 0)
16254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
16264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
16274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
16284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (cats[c] == 0 && isinsets(g, c)) {
1629cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			_DIAGASSERT(__type_fit(unsigned char,
1630cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			    g->ncategories + 1));
16314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			cat = g->ncategories++;
16324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			cats[c] = cat;
16334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
16344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				if (cats[c2] == 0 && samesets(g, c, c2))
16354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross					cats[c2] = cat;
16364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
16374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
16384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
16394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
16404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - dupl - emit a duplicate of a bunch of sops
1641cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static sopno dupl(struct parse *p, sopno start, sopno finish);
16424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
16434fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic sopno			/* start of duplicate */
1644cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesdupl(
1645cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1646cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    sopno start,			/* from here */
1647cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    sopno finish)			/* to this less one */
16484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1649cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sopno ret;
16504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno len = finish - start;
16514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1652cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1653cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1654cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	ret = HERE();
1655cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
16564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(finish >= start);
16574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (len == 0)
16584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return(ret);
1659cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (!enlarge(p, p->ssize + len))/* this many unexpected additions */
1660cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return ret;
1661cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	(void)memcpy(p->strip + p->slen, p->strip + start,
1662cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	    (size_t)len * sizeof(sop));
16634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->slen += len;
16644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(ret);
16654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
16664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
16674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
16684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - doemit - emit a strip operator
1669cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void doemit(struct parse *p, sop op, size_t opnd);
16704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
16714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * It might seem better to implement this as a macro with a function as
16724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * hard-case backup, but it's just too big and messy unless there are
16734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * some changes to the data structures.  Maybe later.
16744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
16754fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1676cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesdoemit(
1677cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1678cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    sop op,
1679cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    sopno opnd)
16804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1681cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1682cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
16834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* avoid making error situations worse */
16844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->error != 0)
16854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
16864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
16874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* deal with oversize operands ("can't happen", more or less) */
16884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(opnd < 1<<OPSHIFT);
16894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
16904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* deal with undersized strip */
16914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->slen >= p->ssize)
1692cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		if (!enlarge(p, (p->ssize+1) / 2 * 3))	/* +50% */
1693cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			return;
16944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
16954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* finally, it's all reduced to the easy case */
1696cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	p->strip[p->slen++] = (sop)SOP(op, opnd);
16974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
16984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
16994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
17004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - doinsert - insert a sop into the strip
1701cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
17024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
17034fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1704cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesdoinsert(
1705cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1706cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    sop op,
1707cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    sopno opnd,
1708cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    sopno pos)
17094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
17104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno sn;
17114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sop s;
17124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	int i;
17134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1714cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1715cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
17164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* avoid making error situations worse */
17174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->error != 0)
17184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
17194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
17204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sn = HERE();
17214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	EMIT(op, opnd);		/* do checks, ensure space */
17224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(HERE() == sn+1);
17234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	s = p->strip[sn];
17244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
17254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* adjust paren pointers */
17264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(pos > 0);
17274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (i = 1; i < NPAREN; i++) {
17284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (p->pbegin[i] >= pos) {
17294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			p->pbegin[i]++;
17304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
17314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		if (p->pend[i] >= pos) {
17324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			p->pend[i]++;
17334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
17344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
17354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1736cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	memmove(&p->strip[pos+1], &p->strip[pos], (HERE()-pos-1)*sizeof(sop));
17374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->strip[pos] = s;
17384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
17394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
17404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
17414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - dofwd - complete a forward reference
1742cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void dofwd(struct parse *p, sopno pos, sop value);
17434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
17444fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1745cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesdofwd(
1746cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1747cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    sopno pos,
1748cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    sopno value)
17494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1750cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1751cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1752cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
17534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* avoid making error situations worse */
17544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->error != 0)
17554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
17564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
17574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(value < 1<<OPSHIFT);
1758cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	p->strip[pos] = (sop)(OP(p->strip[pos]) | value);
17594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
17604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
17614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
17624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - enlarge - enlarge the strip
1763cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void enlarge(struct parse *p, sopno size);
17644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
1765cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int
1766cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesenlarge(
1767cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1768cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    sopno size)
17694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
17704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sop *sp;
1771cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sopno osize;
1772cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1773cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
17744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
17754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->ssize >= size)
1776cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return 1;
17774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1778cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	osize = p->ssize;
1779cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	p->ssize = size;
1780cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (MEMSIZE(p) > MEMLIMIT)
1781cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		goto oomem;
1782cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sp = realloc(p->strip, p->ssize * sizeof(sop));
17834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (sp == NULL) {
1784cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesoomem:
1785cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		p->ssize = osize;
17864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_ESPACE);
1787cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		return 0;
17884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
17894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	p->strip = sp;
1790cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	return 1;
17914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
17924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
17934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
17944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - stripsnug - compact the strip
1795cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void stripsnug(struct parse *p, struct re_guts *g);
17964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
17974fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1798cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstripsnug(
1799cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1800cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct re_guts *g)
18014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
1802cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1803cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1804cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(g != NULL);
1805cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
18064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->nstates = p->slen;
1807cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	g->strip = realloc(p->strip, p->slen * sizeof(sop));
18084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (g->strip == NULL) {
18094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		SETERROR(REG_ESPACE);
18104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		g->strip = p->strip;
18114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
18124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
18134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
18144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
18154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - findmust - fill in must and mlen with longest mandatory literal string
1816cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void findmust(struct parse *p, struct re_guts *g);
18174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
18184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This algorithm could do fancy things like analyzing the operands of |
18194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * for common subsequences.  Someday.  This code is simple and finds most
18204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * of the interesting cases.
18214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *
18224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Note that must and mlen got initialized during setup.
18234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
18244fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void
1825cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesfindmust(
1826cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1827cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct re_guts *g)
18284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
18294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sop *scan;
1830cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sop *start = NULL;
1831cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	sop *newstart = NULL;
18324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno newlen;
18334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sop s;
18344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	char *cp;
18354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno i;
18364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1837cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1838cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(g != NULL);
1839cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
18404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* avoid making error situations worse */
18414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->error != 0)
18424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
18434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
18444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* find the longest OCHAR sequence in strip */
18454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	newlen = 0;
18464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	scan = g->strip + 1;
18474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	do {
18484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		s = *scan++;
18494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		switch (OP(s)) {
18504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		case OCHAR:		/* sequence member */
18514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			if (newlen == 0)		/* new sequence */
18524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				newstart = scan - 1;
18534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			newlen++;
18544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			break;
18554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		case OPLUS_:		/* things that don't break one */
18564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		case OLPAREN:
18574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		case ORPAREN:
18584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			break;
18594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		case OQUEST_:		/* things that must be skipped */
18604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		case OCH_:
18614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			scan--;
18624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			do {
18634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				scan += OPND(s);
18644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				s = *scan;
18654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				/* assert() interferes w debug printouts */
18664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				if (OP(s) != O_QUEST && OP(s) != O_CH &&
18674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross							OP(s) != OOR2) {
18684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross					g->iflags |= BAD;
18694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross					return;
18704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				}
18714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			} while (OP(s) != O_QUEST && OP(s) != O_CH);
1872cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes			/* FALLTHROUGH */
18734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		default:		/* things that break a sequence */
18744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			if (newlen > g->mlen) {		/* ends one */
18754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				start = newstart;
18764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				g->mlen = newlen;
18774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			}
18784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			newlen = 0;
18794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			break;
18804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
18814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	} while (OP(s) != OEND);
18824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1883cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (start == NULL)
1884cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes		g->mlen = 0;
1885cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
1886cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	if (g->mlen == 0)	/* there isn't one */
18874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
18884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
18894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	/* turn it into a character string */
18904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	g->must = malloc((size_t)g->mlen + 1);
18914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (g->must == NULL) {		/* argh; just forget it */
18924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		g->mlen = 0;
18934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return;
18944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
18954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	cp = g->must;
18964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	scan = start;
18974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	for (i = g->mlen; i > 0; i--) {
18984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		while (OP(s = *scan++) != OCHAR)
18994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			continue;
19004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		assert(cp < g->must + g->mlen);
19014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		*cp++ = (char)OPND(s);
19024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	}
19034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	assert(cp == g->must + g->mlen);
19044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	*cp++ = '\0';		/* just on general principles */
19054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
19064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
19074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*
19084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - pluscount - count + nesting
1909cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static sopno pluscount(struct parse *p, struct re_guts *g);
19104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */
19114fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic sopno			/* nesting depth */
1912cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughespluscount(
1913cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct parse *p,
1914cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes    struct re_guts *g)
19154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{
19164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sop *scan;
19174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sop s;
19184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno plusnest = 0;
19194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	sopno maxnest = 0;
19204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
1921cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(p != NULL);
1922cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes	_DIAGASSERT(g != NULL);
1923cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes
19244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (p->error != 0)
19254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		return(0);	/* there may not be an OEND */
19264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross
19274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	scan = g->strip + 1;
19284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	do {
19294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		s = *scan++;
19304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		switch (OP(s)) {
19314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		case OPLUS_:
19324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			plusnest++;
19334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			break;
19344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		case O_PLUS:
19354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			if (plusnest > maxnest)
19364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross				maxnest = plusnest;
19374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			plusnest--;
19384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross			break;
19394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		}
19404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	} while (OP(s) != OEND);
19414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	if (plusnest != 0)
19424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross		g->iflags |= BAD;
19434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross	return(maxnest);
19444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}
1945