1cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/* $NetBSD: regcomp.c,v 1.33 2012/03/13 21:13:43 christos Exp $ */ 2cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 34fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/*- 44fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Copyright (c) 1992, 1993, 1994 54fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * The Regents of the University of California. All rights reserved. 64fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 74fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This code is derived from software contributed to Berkeley by 84fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Henry Spencer. 94fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Redistribution and use in source and binary forms, with or without 114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * modification, are permitted provided that the following conditions 124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * are met: 134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 1. Redistributions of source code must retain the above copyright 144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * notice, this list of conditions and the following disclaimer. 154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 2. Redistributions in binary form must reproduce the above copyright 164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * notice, this list of conditions and the following disclaimer in the 174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * documentation and/or other materials provided with the distribution. 184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 3. Neither the name of the University nor the names of its contributors 194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * may be used to endorse or promote products derived from this software 204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * without specific prior written permission. 214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * SUCH DAMAGE. 334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 37cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/*- 38cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * Copyright (c) 1992, 1993, 1994 Henry Spencer. 39cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 40cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * This code is derived from software contributed to Berkeley by 41cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * Henry Spencer. 42cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 43cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * Redistribution and use in source and binary forms, with or without 44cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * modification, are permitted provided that the following conditions 45cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * are met: 46cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 1. Redistributions of source code must retain the above copyright 47cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * notice, this list of conditions and the following disclaimer. 48cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 2. Redistributions in binary form must reproduce the above copyright 49cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * notice, this list of conditions and the following disclaimer in the 50cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * documentation and/or other materials provided with the distribution. 51cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 3. All advertising materials mentioning features or use of this software 52cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * must display the following acknowledgement: 53cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * This product includes software developed by the University of 54cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * California, Berkeley and its contributors. 55cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 4. Neither the name of the University nor the names of its contributors 56cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * may be used to endorse or promote products derived from this software 57cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * without specific prior written permission. 58cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 59cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * SUCH DAMAGE. 70cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * 71cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 72cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes */ 73cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 74cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#include <sys/cdefs.h> 75cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#if defined(LIBC_SCCS) && !defined(lint) 76cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#if 0 77cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; 78cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#else 79cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes__RCSID("$NetBSD: regcomp.c,v 1.33 2012/03/13 21:13:43 christos Exp $"); 80cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif 81cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif /* LIBC_SCCS and not lint */ 82cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 83cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#include "namespace.h" 844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include <sys/types.h> 85cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 86cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#include <assert.h> 874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include <ctype.h> 884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include <limits.h> 89cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#include <stdio.h> 904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include <stdlib.h> 91cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#include <string.h> 924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include <regex.h> 934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 94cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#ifdef __weak_alias 95cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes__weak_alias(regcomp,_regcomp) 96cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif 97cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include "utils.h" 994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include "regex2.h" 1004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include "cclass.h" 1024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#include "cname.h" 1034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 1054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * parse structure, passed up and down to avoid global variables and 1064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * other clumsinesses 1074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 1084fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstruct parse { 109cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *next; /* next character in RE */ 110cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *end; /* end of string (-> NUL normally) */ 1114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int error; /* has an error been seen? */ 1124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sop *strip; /* malloced strip */ 1134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno ssize; /* malloced strip size (allocated) */ 1144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno slen; /* malloced strip length (used) */ 115cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t ncsalloc; /* number of csets allocated */ 1164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross struct re_guts *g; 1174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross# define NPAREN 10 /* we need to remember () 1-9 for back refs */ 1184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 1194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno pend[NPAREN]; /* -> ) ([0] unused) */ 1204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross}; 1214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 122cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/* ========= begin header generated by ./mkh ========= */ 123cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#ifdef __cplusplus 124cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesextern "C" { 125cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif 126cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 127cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/* === regcomp.c === */ 128cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_ere(struct parse *p, int stop, size_t reclimit); 129cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_ere_exp(struct parse *p, size_t reclimit); 130cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_str(struct parse *p); 131cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_bre(struct parse *p, int end1, int end2, size_t reclimit); 132cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int p_simp_re(struct parse *p, int starordinary, size_t reclimit); 133cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int p_count(struct parse *p); 134cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_bracket(struct parse *p); 135cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_b_term(struct parse *p, cset *cs); 136cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_b_cclass(struct parse *p, cset *cs); 137cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void p_b_eclass(struct parse *p, cset *cs); 138cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic char p_b_symbol(struct parse *p); 139cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic char p_b_coll_elem(struct parse *p, int endc); 140cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int othercase(int ch); 141cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void bothcases(struct parse *p, int ch); 142cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void ordinary(struct parse *p, int ch); 143cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void nonnewline(struct parse *p); 144cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit); 145cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int seterr(struct parse *p, int e); 146cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic cset *allocset(struct parse *p); 147cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void freeset(struct parse *p, cset *cs); 148cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic sopno freezeset(struct parse *p, cset *cs); 149cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int firstch(struct parse *p, cset *cs); 150cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int nch(struct parse *p, cset *cs); 151cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void mcadd(struct parse *p, cset *cs, const char *cp); 152cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#if 0 153cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void mcsub(cset *cs, char *cp); 154cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int mcin(cset *cs, char *cp); 155cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic char *mcfind(cset *cs, char *cp); 156cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif 157cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void mcinvert(struct parse *p, cset *cs); 158cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void mccase(struct parse *p, cset *cs); 159cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int isinsets(struct re_guts *g, int c); 160cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int samesets(struct re_guts *g, int c1, int c2); 161cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void categorize(struct parse *p, struct re_guts *g); 162cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic sopno dupl(struct parse *p, sopno start, sopno finish); 163cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void doemit(struct parse *p, sop op, sopno opnd); 164cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void doinsert(struct parse *p, sop op, sopno opnd, sopno pos); 165cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void dofwd(struct parse *p, sopno pos, sopno value); 166cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int enlarge(struct parse *p, sopno size); 167cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void stripsnug(struct parse *p, struct re_guts *g); 168cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void findmust(struct parse *p, struct re_guts *g); 169cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic sopno pluscount(struct parse *p, struct re_guts *g); 170cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 171cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#ifdef __cplusplus 172cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes} 173cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif 174cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/* ========= end header generated by ./mkh ========= */ 1754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1764fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic char nuls[10]; /* place to point scanner in event of error */ 1774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 1794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * macros for use with parse structure 1804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * BEWARE: these know that the parse structure is named `p' !!! 1814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 1824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define PEEK() (*p->next) 1834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define PEEK2() (*(p->next+1)) 1844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define MORE() (p->next < p->end) 1854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define MORE2() (p->next+1 < p->end) 1864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define SEE(c) (MORE() && PEEK() == (c)) 1874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 1884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 1894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 1904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define NEXT() (p->next++) 1914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define NEXT2() (p->next += 2) 1924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define NEXTn(n) (p->next += (n)) 1934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define GETNEXT() (*p->next++) 1944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define SETERROR(e) seterr(p, (e)) 195cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define REQUIRE(co, e) (void) ((co) || SETERROR(e)) 1964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 197cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define MUSTEAT(c, e) (void) (REQUIRE(MORE() && GETNEXT() == (c), e)) 1984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 199cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define EMIT(op, sopnd) doemit(p, (sop)(op), sopnd) 2004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 2014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 2024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 2034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define HERE() (p->slen) 2044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define THERE() (p->slen - 1) 2054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define THERETHERE() (p->slen - 2) 2064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define DROP(n) (p->slen -= (n)) 2074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 2084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#ifndef NDEBUG 2094fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int never = 0; /* for use in asserts; shuts lint up */ 2104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#else 2114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#define never 0 /* some <assert.h>s have bugs too */ 2124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#endif 2134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 214cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define MEMLIMIT 0x8000000 215cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define MEMSIZE(p) \ 216cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes ((p)->ncsalloc / CHAR_BIT * (p)->g->csetsize + \ 217cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (p)->ncsalloc * sizeof(cset) + \ 218cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (p)->ssize * sizeof(sop)) 219cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#define RECLIMIT 256 220cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 2214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 2224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - regcomp - interface for parser and compilation 223cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = extern int regcomp(regex_t *, const char *, int); 224cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define REG_BASIC 0000 225cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define REG_EXTENDED 0001 226cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define REG_ICASE 0002 227cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define REG_NOSUB 0004 228cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define REG_NEWLINE 0010 229cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define REG_NOSPEC 0020 230cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define REG_PEND 0040 231cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes = #define REG_DUMP 0200 2324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 2334fa7b105644222d9b35347c9d226ca8e011072ebColin Crossint /* 0 success, otherwise REG_something */ 234cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesregcomp( 235cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes regex_t *preg, 236cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *pattern, 237cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int cflags) 2384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 2394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross struct parse pa; 2404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross struct re_guts *g; 2414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross struct parse *p = &pa; 2424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int i; 2434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross size_t len; 2444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#ifdef REDEBUG 2454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross# define GOODFLAGS(f) (f) 2464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#else 2474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross# define GOODFLAGS(f) ((f)&~REG_DUMP) 2484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#endif 2494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 250cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(preg != NULL); 251cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(pattern != NULL); 252cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 2534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cflags = GOODFLAGS(cflags); 2544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 2554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(REG_INVARG); 2564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 2574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (cflags®_PEND) { 2584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (preg->re_endp < pattern) 2594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(REG_INVARG); 2604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross len = preg->re_endp - pattern; 2614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } else 262cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes len = strlen(pattern); 2634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 2644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* do the mallocs early so failure handling is easy */ 2654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g = (struct re_guts *)malloc(sizeof(struct re_guts) + 2664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross (NC-1)*sizeof(cat_t)); 2674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (g == NULL) 2684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(REG_ESPACE); 2694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 270cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->strip = malloc(p->ssize * sizeof(sop)); 2714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->slen = 0; 2724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->strip == NULL) { 273cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes free(g); 2744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(REG_ESPACE); 2754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 2764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 2774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* set things up */ 2784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g = g; 279cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->next = pattern; 2804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->end = p->next + len; 2814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->error = 0; 2824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->ncsalloc = 0; 2834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (i = 0; i < NPAREN; i++) { 2844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->pbegin[i] = 0; 2854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->pend[i] = 0; 2864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 2874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->csetsize = NC; 2884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->sets = NULL; 2894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->setbits = NULL; 2904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->ncsets = 0; 2914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->cflags = cflags; 2924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->iflags = 0; 2934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->nbol = 0; 2944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->neol = 0; 2954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->must = NULL; 2964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->mlen = 0; 2974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->nsub = 0; 2984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->ncategories = 1; /* category 0 is "everything else" */ 2994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->categories = &g->catspace[-(CHAR_MIN)]; 3004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 3014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->backrefs = 0; 3024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 3034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* do it */ 3044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OEND, 0); 3054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->firststate = THERE(); 3064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (cflags®_EXTENDED) 307cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p_ere(p, OUT, 0); 3084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross else if (cflags®_NOSPEC) 3094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p_str(p); 3104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross else 311cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p_bre(p, OUT, OUT, 0); 3124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OEND, 0); 3134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->laststate = THERE(); 3144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 3154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* tidy up loose ends and fill things in */ 3164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross categorize(p, g); 3174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross stripsnug(p, g); 3184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross findmust(p, g); 3194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->nplus = pluscount(p, g); 3204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->magic = MAGIC2; 3214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross preg->re_nsub = g->nsub; 3224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross preg->re_g = g; 3234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross preg->re_magic = MAGIC1; 3244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#ifndef REDEBUG 3254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* not debugging, so can't rely on the assert() in regexec() */ 3264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (g->iflags&BAD) 3274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_ASSERT); 3284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#endif 3294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 3304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* win or lose, we're done */ 3314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->error != 0) /* lose */ 3324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross regfree(preg); 3334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(p->error); 3344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 3354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 3364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 3374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_ere - ERE parser top level, concatenation and alternation 338cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_ere(struct parse *p, int stop, size_t reclimit); 3394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 3404fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 341cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_ere( 342cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 343cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int stop, /* character this ERE should end at */ 344cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t reclimit) 3454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 3464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross char c; 347cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno prevback = 0; /* pacify gcc */ 348cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno prevfwd = 0; /* pacify gcc */ 3494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno conc; 3504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int first = 1; /* is this the first alternative? */ 3514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 352cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 353cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 354cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { 355cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->error = REG_ESPACE; 356cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return; 357cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes } 358cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 3594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (;;) { 3604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* do a bunch of concatenated expressions */ 3614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross conc = HERE(); 3624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross while (MORE() && (c = PEEK()) != '|' && c != stop) 363cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p_ere_exp(p, reclimit); 3644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 3654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 3664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!EAT('|')) 3674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; /* NOTE BREAK OUT */ 3684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 3694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (first) { 3704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross INSERT(OCH_, conc); /* offset is wrong */ 3714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross prevfwd = conc; 3724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross prevback = conc; 3734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross first = 0; 3744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 3754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(OOR1, prevback); 3764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross prevback = THERE(); 3774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross AHEAD(prevfwd); /* fix previous offset */ 3784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross prevfwd = HERE(); 3794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OOR2, 0); /* offset is very wrong */ 3804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 3814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 3824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!first) { /* tail-end fixups */ 3834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross AHEAD(prevfwd); 3844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(O_CH, prevback); 3854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 3864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 3874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(!MORE() || SEE(stop)); 3884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 3894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 3904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 3914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 392cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_ere_exp(struct parse *p, size_t reclimit); 3934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 3944fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 395cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_ere_exp( 396cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 397cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t reclimit) 3984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 3994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross char c; 4004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno pos; 4014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int count; 4024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int count2; 4034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno subno; 4044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int wascaret = 0; 4054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 406cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 407cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 4084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(MORE()); /* caller should have ensured this */ 4094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross c = GETNEXT(); 4104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 4114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross pos = HERE(); 4124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross switch (c) { 4134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '(': 4144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EPAREN); 4154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->nsub++; 4164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross subno = p->g->nsub; 4174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (subno < NPAREN) 4184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->pbegin[subno] = HERE(); 4194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OLPAREN, subno); 4204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!SEE(')')) 421cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p_ere(p, ')', reclimit); 4224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (subno < NPAREN) { 4234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->pend[subno] = HERE(); 4244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(p->pend[subno] != 0); 4254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 4264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(ORPAREN, subno); 4274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross MUSTEAT(')', REG_EPAREN); 4284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 4294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#ifndef POSIX_MISTAKE 4304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case ')': /* happens only if no current unmatched ( */ 4314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* 4324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * You may ask, why the ifndef? Because I didn't notice 4334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * this until slightly too late for 1003.2, and none of the 4344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * other 1003.2 regular-expression reviewers noticed it at 4354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * all. So an unmatched ) is legal POSIX, at least until 4364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * we can get it fixed. 4374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 4384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_EPAREN); 4394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 4404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross#endif 4414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '^': 4424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OBOL, 0); 4434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->iflags |= USEBOL; 4444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->nbol++; 4454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross wascaret = 1; 4464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 4474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '$': 4484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OEOL, 0); 4494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->iflags |= USEEOL; 4504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->neol++; 4514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 4524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '|': 4534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_EMPTY); 4544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 4554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '*': 4564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '+': 4574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '?': 4584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_BADRPT); 4594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 4604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '.': 4614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->g->cflags®_NEWLINE) 4624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross nonnewline(p); 4634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross else 4644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OANY, 0); 4654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 4664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '[': 4674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p_bracket(p); 4684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 4694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '\\': 4704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EESCAPE); 4714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross c = GETNEXT(); 4724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ordinary(p, c); 4734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 4744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '{': /* okay as ordinary except if digit follows */ 475cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes REQUIRE(!MORE() || !isdigit((unsigned char)PEEK()), REG_BADRPT); 4764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* FALLTHROUGH */ 4774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross default: 4784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ordinary(p, c); 4794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 4804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 4814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 4824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!MORE()) 4834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 4844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross c = PEEK(); 4854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* we call { a repetition if followed by a digit */ 4864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!( c == '*' || c == '+' || c == '?' || 487cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) )) 4884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; /* no repetition, we're done */ 4894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross NEXT(); 4904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 4914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(!wascaret, REG_BADRPT); 4924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross switch (c) { 4934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '*': /* implemented as +? */ 4944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* this case does not require the (y|) trick, noKLUDGE */ 4954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross INSERT(OPLUS_, pos); 4964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(O_PLUS, pos); 4974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross INSERT(OQUEST_, pos); 4984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(O_QUEST, pos); 4994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 5004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '+': 5014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross INSERT(OPLUS_, pos); 5024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(O_PLUS, pos); 5034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 5044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '?': 5054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 5064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross INSERT(OCH_, pos); /* offset slightly wrong */ 5074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(OOR1, pos); /* this one's right */ 5084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross AHEAD(pos); /* fix the OCH_ */ 5094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OOR2, 0); /* offset very wrong... */ 5104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross AHEAD(THERE()); /* ...so fix it */ 5114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(O_CH, THERETHERE()); 5124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 5134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '{': 5144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross count = p_count(p); 5154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (EAT(',')) { 516cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (isdigit((unsigned char)PEEK())) { 5174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross count2 = p_count(p); 5184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(count <= count2, REG_BADBR); 5194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } else /* single number with comma */ 5204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross count2 = INFINITY; 5214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } else /* just a single number */ 5224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross count2 = count; 523cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes repeat(p, pos, count, count2, 0); 5244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!EAT('}')) { /* error heuristics */ 5254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross while (MORE() && PEEK() != '}') 5264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross NEXT(); 5274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EBRACE); 5284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_BADBR); 5294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 5304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 5314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 5324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 5334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!MORE()) 5344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 5354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross c = PEEK(); 5364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!( c == '*' || c == '+' || c == '?' || 537cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (c == '{' && MORE2() && isdigit((unsigned char)PEEK2())) ) ) 5384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 5394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_BADRPT); 5404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 5414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 5424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 5434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_str - string (no metacharacters) "parser" 544cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_str(struct parse *p); 5454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 5464fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 547cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_str( 548cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p) 5494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 550cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 551cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 552cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 5534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EMPTY); 5544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross while (MORE()) 5554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ordinary(p, GETNEXT()); 5564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 5574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 5584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 5594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_bre - BRE parser top level, anchoring and concatenation 560cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_bre(struct parse *p, int end1, \ 561cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == int end2, size_t reclimit); 5624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Giving end1 as OUT essentially eliminates the end1/end2 check. 5634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 5644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This implementation is a bit of a kludge, in that a trailing $ is first 5654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * taken as an ordinary character and then revised to be an anchor. The 5664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * only undesirable side effect is that '$' gets included as a character 5674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * category in such cases. This is fairly harmless; not worth fixing. 5684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * The amount of lookahead needed to avoid this kludge is excessive. 5694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 5704fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 571cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_bre( 572cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 5734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int end1, /* first terminating character */ 574cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int end2, /* second terminating character */ 575cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t reclimit) 5764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 577cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno start; 5784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int first = 1; /* first subexpression? */ 5794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int wasdollar = 0; 5804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 581cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 582cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 583cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { 584cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->error = REG_ESPACE; 585cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return; 586cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes } 587cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 588cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes start = HERE(); 589cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 5904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (EAT('^')) { 5914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OBOL, 0); 5924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->iflags |= USEBOL; 5934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->nbol++; 5944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 5954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross while (MORE() && !SEETWO(end1, end2)) { 596cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes wasdollar = p_simp_re(p, first, reclimit); 5974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross first = 0; 5984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 5994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (wasdollar) { /* oops, that was a trailing anchor */ 6004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross DROP(1); 6014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OEOL, 0); 6024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->iflags |= USEEOL; 6034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->neol++; 6044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 6054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 6064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 6074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 6084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 6094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 6104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 611cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit); 6124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 6134fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int /* was the simple RE an unbackslashed $? */ 614cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_simp_re( 615cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 616cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int starordinary, /* is a leading * an ordinary character? */ 617cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t reclimit) 6184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 6194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int c; 6204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int count; 6214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int count2; 622cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno pos, i; 6234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno subno; 6244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross# define BACKSL (1<<CHAR_BIT) 6254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 626cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 627cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 6284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross pos = HERE(); /* repetion op, if any, covers from here */ 6294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 6304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(MORE()); /* caller should have ensured this */ 6314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross c = GETNEXT(); 6324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (c == '\\') { 6334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EESCAPE); 634cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes c = BACKSL | (unsigned char)GETNEXT(); 6354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 6364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross switch (c) { 6374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '.': 6384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->g->cflags®_NEWLINE) 6394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross nonnewline(p); 6404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross else 6414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OANY, 0); 6424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 6434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '[': 6444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p_bracket(p); 6454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 6464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'{': 6474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_BADRPT); 6484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 6494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'(': 6504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->nsub++; 6514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross subno = p->g->nsub; 6524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (subno < NPAREN) 6534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->pbegin[subno] = HERE(); 6544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OLPAREN, subno); 6554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* the MORE here is an error heuristic */ 6564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (MORE() && !SEETWO('\\', ')')) 657cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p_bre(p, '\\', ')', reclimit); 6584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (subno < NPAREN) { 6594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->pend[subno] = HERE(); 6604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(p->pend[subno] != 0); 6614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 6624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(ORPAREN, subno); 6634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 6644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 6654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|')': /* should not get here -- must be user */ 6664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'}': 6674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_EPAREN); 6684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 6694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'1': 6704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'2': 6714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'3': 6724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'4': 6734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'5': 6744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'6': 6754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'7': 6764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'8': 6774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case BACKSL|'9': 6784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross i = (c&~BACKSL) - '0'; 6794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(i < NPAREN); 6804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->pend[i] != 0) { 6814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(i <= p->g->nsub); 6824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OBACK_, i); 6834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(p->pbegin[i] != 0); 6844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 6854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(OP(p->strip[p->pend[i]]) == ORPAREN); 6864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 6874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(O_BACK, i); 6884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } else 6894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_ESUBREG); 6904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->backrefs = 1; 6914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 6924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '*': 6934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(starordinary, REG_BADRPT); 6944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* FALLTHROUGH */ 6954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross default: 696cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes ordinary(p, c &~ BACKSL); 6974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 6984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 6994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 7004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (EAT('*')) { /* implemented as +? */ 7014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* this case does not require the (y|) trick, noKLUDGE */ 7024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross INSERT(OPLUS_, pos); 7034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(O_PLUS, pos); 7044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross INSERT(OQUEST_, pos); 7054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(O_QUEST, pos); 7064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } else if (EATTWO('\\', '{')) { 7074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross count = p_count(p); 7084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (EAT(',')) { 709cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (MORE() && isdigit((unsigned char)PEEK())) { 7104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross count2 = p_count(p); 7114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(count <= count2, REG_BADBR); 7124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } else /* single number with comma */ 7134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross count2 = INFINITY; 7144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } else /* just a single number */ 7154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross count2 = count; 716cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes repeat(p, pos, count, count2, 0); 7174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!EATTWO('\\', '}')) { /* error heuristics */ 7184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross while (MORE() && !SEETWO('\\', '}')) 7194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross NEXT(); 7204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EBRACE); 7214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_BADBR); 7224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 723cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ 7244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(1); 7254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 7264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(0); 7274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 7284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 7294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 7304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_count - parse a repetition count 731cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int p_count(struct parse *p); 7324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 7334fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int /* the value */ 734cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_count( 735cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p) 7364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 7374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int count = 0; 7384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int ndigits = 0; 7394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 740cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 741cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 742cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes while (MORE() && isdigit((unsigned char)PEEK()) && count <= DUPMAX) { 7434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross count = count*10 + (GETNEXT() - '0'); 7444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ndigits++; 7454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 7464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 7474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 7484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(count); 7494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 7504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 7514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 7524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_bracket - parse a bracketed character list 753cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_bracket(struct parse *p); 7544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 7554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Note a significant property of this code: if the allocset() did SETERROR, 7564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * no set operations are done. 7574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 7584fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 759cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_bracket( 760cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p) 7614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 7624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cset *cs; 7634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int invert = 0; 764cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 765cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 766cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cs = allocset(p); 767cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (cs == NULL) 768cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return; 7694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 7704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* Dept of Truly Sickening Special-Case Kludges */ 771cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 772cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (size_t)6) == 0) { 7734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OBOW, 0); 7744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross NEXTn(6); 7754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 7764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 777cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 778cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (size_t)6) == 0) { 7794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OEOW, 0); 7804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross NEXTn(6); 7814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 7824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 7834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 7844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (EAT('^')) 7854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross invert++; /* make note to invert set at end */ 7864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (EAT(']')) 7874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross CHadd(cs, ']'); 7884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross else if (EAT('-')) 7894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross CHadd(cs, '-'); 7904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 7914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p_b_term(p, cs); 7924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (EAT('-')) 7934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross CHadd(cs, '-'); 7944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross MUSTEAT(']', REG_EBRACK); 7954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 796cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (p->error != 0) /* don't mess things up further */ 7974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 7984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 7994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->g->cflags®_ICASE) { 800cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes ssize_t i; 8014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int ci; 8024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 8034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (i = p->g->csetsize - 1; i >= 0; i--) 8044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (CHIN(cs, i) && isalpha(i)) { 805cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes ci = othercase((int)i); 8064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (ci != i) 8074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross CHadd(cs, ci); 8084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 8094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (cs->multis != NULL) 8104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross mccase(p, cs); 8114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 8124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (invert) { 813cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes ssize_t i; 8144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 8154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (i = p->g->csetsize - 1; i >= 0; i--) 8164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (CHIN(cs, i)) 817cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes CHsub(cs, (int)i); 8184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross else 819cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes CHadd(cs, (int)i); 8204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->g->cflags®_NEWLINE) 8214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross CHsub(cs, '\n'); 8224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (cs->multis != NULL) 8234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross mcinvert(p, cs); 8244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 8254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 8264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(cs->multis == NULL); /* xxx */ 8274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 8284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (nch(p, cs) == 1) { /* optimize singleton sets */ 8294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ordinary(p, firstch(p, cs)); 8304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross freeset(p, cs); 8314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } else 8324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OANYOF, freezeset(p, cs)); 8334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 8344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 8354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 8364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_b_term - parse one term of a bracketed character list 837cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_b_term(struct parse *p, cset *cs); 8384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 8394fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 840cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_b_term( 841cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 842cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs) 8434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 8444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross char c; 8454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross char start, finish; 8464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int i; 8474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 848cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 849cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 850cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 8514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* classify what we've got */ 8524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross switch ((MORE()) ? PEEK() : '\0') { 8534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '[': 8544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross c = (MORE2()) ? PEEK2() : '\0'; 8554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 856cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 8574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '-': 8584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_ERANGE); 8594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; /* NOTE RETURN */ 860cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 8614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross default: 8624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross c = '\0'; 8634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 8644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 8654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 8664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross switch (c) { 8674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case ':': /* character class */ 8684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross NEXT2(); 8694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EBRACK); 8704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross c = PEEK(); 8714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(c != '-' && c != ']', REG_ECTYPE); 8724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p_b_cclass(p, cs); 8734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EBRACK); 8744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 8754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 8764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case '=': /* equivalence class */ 8774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross NEXT2(); 8784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EBRACK); 8794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross c = PEEK(); 8804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 8814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p_b_eclass(p, cs); 8824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EBRACK); 8834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 8844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 8854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross default: /* symbol, ordinary character, or range */ 8864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* xxx revision needed for multichar stuff */ 8874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross start = p_b_symbol(p); 8884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (SEE('-') && MORE2() && PEEK2() != ']') { 8894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* range */ 8904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross NEXT(); 8914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (EAT('-')) 8924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross finish = '-'; 8934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross else 8944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross finish = p_b_symbol(p); 8954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } else 8964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross finish = start; 8974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* xxx what about signed chars here... */ 8984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(start <= finish, REG_ERANGE); 8994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (i = start; i <= finish; i++) 9004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross CHadd(cs, i); 9014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 9024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 9034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 9044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 9054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 9064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_b_cclass - parse a character-class name and deal with it 907cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_b_cclass(struct parse *p, cset *cs); 9084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 9094fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 910cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_b_cclass( 911cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 912cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs) 9134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 914cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *sp; 91550ace4fec5e8cb5afcbc656a4556fa528adfd760David 'Digit' Turner const struct cclass *cp; 9164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross size_t len; 917cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *u; 9184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross char c; 9194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 920cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 921cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 922cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 923cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sp = p->next; 924cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 925cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes while (MORE() && isalpha((unsigned char)PEEK())) 9264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross NEXT(); 9274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross len = p->next - sp; 9284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (cp = cclasses; cp->name != NULL; cp++) 9294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 9304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 9314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (cp->name == NULL) { 9324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* oops, didn't find it */ 9334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_ECTYPE); 9344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 9354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 9364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 9374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross u = cp->chars; 9384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross while ((c = *u++) != '\0') 9394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross CHadd(cs, c); 9404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 9414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross MCadd(p, cs, u); 9424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 9434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 9444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 9454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_b_eclass - parse an equivalence-class name and deal with it 946cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void p_b_eclass(struct parse *p, cset *cs); 9474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 9484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This implementation is incomplete. xxx 9494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 9504fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 951cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_b_eclass( 952cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 953cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs) 9544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 9554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross char c; 9564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 957cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 958cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 959cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 9604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross c = p_b_coll_elem(p, '='); 9614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross CHadd(cs, c); 9624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 9634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 9644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 9654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 966cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static char p_b_symbol(struct parse *p); 9674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 9684fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic char /* value of symbol */ 969cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_b_symbol( 970cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p) 9714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 9724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross char value; 9734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 974cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 975cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 9764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(MORE(), REG_EBRACK); 9774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!EATTWO('[', '.')) 9784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(GETNEXT()); 9794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 9804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* collating symbol */ 9814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross value = p_b_coll_elem(p, '.'); 9824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 9834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(value); 9844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 9854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 9864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 9874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - p_b_coll_elem - parse a collating-element name and look it up 988cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static char p_b_coll_elem(struct parse *p, int endc); 9894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 9904fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic char /* value of collating element */ 991cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesp_b_coll_elem( 992cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 9934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int endc) /* name ended by endc,']' */ 9944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 995cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *sp; 99650ace4fec5e8cb5afcbc656a4556fa528adfd760David 'Digit' Turner const struct cname *cp; 997cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t len; 998cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 999cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1000cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1001cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sp = p->next; 10024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 10034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross while (MORE() && !SEETWO(endc, ']')) 10044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross NEXT(); 10054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!MORE()) { 10064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_EBRACK); 10074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(0); 10084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 10094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross len = p->next - sp; 10104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (cp = cnames; cp->name != NULL; cp++) 10114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 10124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(cp->code); /* known name */ 10134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (len == 1) 10144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(*sp); /* single character */ 10154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_ECOLLATE); /* neither */ 10164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(0); 10174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 10184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 10194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 10204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - othercase - return the case counterpart of an alphabetic 1021cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int othercase(int ch); 10224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 1023cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int /* if no counterpart, return ch */ 1024cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesothercase( 1025cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int ch) 10264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 10274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(isalpha(ch)); 10284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (isupper(ch)) 1029cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return(tolower(ch)); 10304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross else if (islower(ch)) 1031cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return(toupper(ch)); 10324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross else /* peculiar, but could happen */ 10334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(ch); 10344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 10354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 10364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 10374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - bothcases - emit a dualcase version of a two-case character 1038cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void bothcases(struct parse *p, int ch); 10394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 10404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Boy, is this implementation ever a kludge... 10414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 10424fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1043cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesbothcases( 1044cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1045cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int ch) 10464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1047cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *oldnext; 1048cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *oldend; 10494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross char bracket[3]; 10504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1051cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1052cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1053cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes oldnext = p->next; 1054cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes oldend = p->end; 1055cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 10564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(othercase(ch) != ch); /* p_bracket() would recurse */ 10574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->next = bracket; 10584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->end = bracket+2; 10594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross bracket[0] = ch; 10604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross bracket[1] = ']'; 10614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross bracket[2] = '\0'; 10624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p_bracket(p); 10634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(p->next == bracket+2); 10644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->next = oldnext; 10654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->end = oldend; 10664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 10674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 10684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 10694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - ordinary - emit an ordinary character 1070cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void ordinary(struct parse *p, int ch); 10714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 10724fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1073cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesordinary( 1074cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1075cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int ch) 10764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1077cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cat_t *cap; 1078cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1079cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 10804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1081cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cap = p->g->categories; 1082cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if ((p->g->cflags®_ICASE) && isalpha((unsigned char) ch) 1083cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes && othercase((unsigned char) ch) != (unsigned char) ch) 1084cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes bothcases(p, (unsigned char) ch); 10854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross else { 1086cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes EMIT(OCHAR, (sopno)(unsigned char)ch); 1087cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (cap[ch] == 0) { 1088cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(__type_fit(unsigned char, 1089cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->g->ncategories + 1)); 1090cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cap[ch] = (unsigned char)p->g->ncategories++; 1091cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes } 10924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 10934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 10944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 10954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 10964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - nonnewline - emit REG_NEWLINE version of OANY 1097cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void nonnewline(struct parse *p); 10984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 10994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Boy, is this implementation ever a kludge... 11004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 11014fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1102cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesnonnewline( 1103cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p) 11044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1105cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *oldnext; 1106cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *oldend; 11074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross char bracket[4]; 11084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1109cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1110cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1111cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes oldnext = p->next; 1112cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes oldend = p->end; 1113cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 11144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->next = bracket; 11154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->end = bracket+3; 11164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross bracket[0] = '^'; 11174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross bracket[1] = '\n'; 11184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross bracket[2] = ']'; 11194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross bracket[3] = '\0'; 11204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p_bracket(p); 11214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(p->next == bracket+3); 11224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->next = oldnext; 11234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->end = oldend; 11244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 11254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 11264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 11274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - repeat - generate code for a bounded repetition, recursively if needed 1128cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void repeat(struct parse *p, sopno start, int from, int to, 1129cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == size_t reclimit); 11304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 11314fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1132cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesrepeat( 1133cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 11344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno start, /* operand from here to end of strip */ 11354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int from, /* repeated from this number */ 1136cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int to, /* to this number of times (maybe INFINITY) */ 1137cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t reclimit) 11384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1139cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno finish; 11404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross# define N 2 11414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross# define INF 3 11424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross# define REP(f, t) ((f)*8 + (t)) 11434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 11444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno copy; 11454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1146cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1147cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1148cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (reclimit++ > RECLIMIT) 1149cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->error = REG_ESPACE; 1150cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (p->error) 11514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 11524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1153cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes finish = HERE(); 1154cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 11554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(from <= to); 11564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 11574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross switch (REP(MAP(from), MAP(to))) { 11584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case REP(0, 0): /* must be user doing this */ 11594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross DROP(finish-start); /* drop the operand */ 11604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 11614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case REP(0, 1): /* as x{1,1}? */ 11624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case REP(0, N): /* as x{1,n}? */ 11634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case REP(0, INF): /* as x{1,}? */ 11644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 11654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross INSERT(OCH_, start); /* offset is wrong... */ 1166cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes repeat(p, start+1, 1, to, reclimit); 11674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(OOR1, start); 11684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross AHEAD(start); /* ... fix it */ 11694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OOR2, 0); 11704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross AHEAD(THERE()); 11714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(O_CH, THERETHERE()); 11724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 11734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case REP(1, 1): /* trivial case */ 11744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* done */ 11754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 11764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case REP(1, N): /* as x?x{1,n-1} */ 11774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 11784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross INSERT(OCH_, start); 11794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(OOR1, start); 11804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross AHEAD(start); 11814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(OOR2, 0); /* offset very wrong... */ 11824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross AHEAD(THERE()); /* ...so fix it */ 11834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(O_CH, THERETHERE()); 11844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross copy = dupl(p, start+1, finish+1); 11854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(copy == finish+4); 1186cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes repeat(p, copy, 1, to-1, reclimit); 11874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 11884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case REP(1, INF): /* as x+ */ 11894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross INSERT(OPLUS_, start); 11904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross ASTERN(O_PLUS, start); 11914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 11924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case REP(N, N): /* as xx{m-1,n-1} */ 11934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross copy = dupl(p, start, finish); 1194cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes repeat(p, copy, from-1, to-1, reclimit); 11954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 11964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case REP(N, INF): /* as xx{n-1,INF} */ 11974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross copy = dupl(p, start, finish); 1198cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes repeat(p, copy, from-1, to, reclimit); 11994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 12004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross default: /* "can't happen" */ 12014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_ASSERT); /* just in case */ 12024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 12034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 12044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 12054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 12064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 12074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - seterr - set an error condition 1208cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int seterr(struct parse *p, int e); 12094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 12104fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int /* useless but makes type checking happy */ 1211cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesseterr( 1212cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1213cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int e) 12144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1215cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1216cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1217cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 12184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->error == 0) /* keep earliest error condition */ 12194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->error = e; 12204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->next = nuls; /* try to bring things to a halt */ 12214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->end = nuls; 12224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(0); /* make the return value well-defined */ 12234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 12244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 12254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 12264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - allocset - allocate a set of characters for [] 1227cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static cset *allocset(struct parse *p); 12284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 12294fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic cset * 1230cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesallocset( 1231cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p) 12324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1233cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t no; 12344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross size_t nc; 12354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross size_t nbytes; 12364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cset *cs; 1237cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t css; 1238cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t i; 12394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1240cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 12414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1242cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes no = p->g->ncsets++; 1243cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes css = (size_t)p->g->csetsize; 1244cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (no >= p->ncsalloc) { /* need another column of space */ 12454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->ncsalloc += CHAR_BIT; 12464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross nc = p->ncsalloc; 12474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(nc % CHAR_BIT == 0); 12484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross nbytes = nc / CHAR_BIT * css; 1249cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (MEMSIZE(p) > MEMLIMIT) 1250cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes goto oomem; 1251cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (p->g->sets == NULL) 1252cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->g->sets = malloc(nc * sizeof(cset)); 1253cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes else 1254cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->g->sets = realloc(p->g->sets, nc * sizeof(cset)); 1255cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (p->g->setbits == NULL) 1256cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->g->setbits = malloc(nbytes); 1257cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes else { 1258cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->g->setbits = realloc(p->g->setbits, nbytes); 1259cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes /* xxx this isn't right if setbits is now NULL */ 1260cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes for (i = 0; i < no; i++) 1261cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1262cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes } 1263cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (p->g->sets != NULL && p->g->setbits != NULL) 1264cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (void) memset((char *)p->g->setbits + (nbytes - css), 1265cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 0, css); 1266cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes else { 1267cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesoomem: 1268cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes no = 0; 1269cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes SETERROR(REG_ESPACE); 1270cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes /* caller's responsibility not to do set ops */ 1271cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return NULL; 1272cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes } 12734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 12744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 12754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cs = &p->g->sets[no]; 12764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1277cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cs->mask = 1 << (unsigned int)((no) % CHAR_BIT); 12784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cs->hash = 0; 12794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cs->smultis = 0; 12804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cs->multis = NULL; 12814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 12824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(cs); 12834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 12844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 12854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 12864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - freeset - free a now-unused set 1287cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void freeset(struct parse *p, cset *cs); 12884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 12894fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1290cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesfreeset( 1291cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1292cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs) 12934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1294cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t i; 1295cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *top; 1296cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t css; 1297cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1298cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1299cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 13004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1301cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes top = &p->g->sets[p->g->ncsets]; 1302cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes css = (size_t)p->g->csetsize; 1303cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1304cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes for (i = 0; i < css; i++) 1305cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes CHsub(cs, (int)i); 13064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (cs == top-1) /* recover only the easy case */ 13074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->g->ncsets--; 13084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 13094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 13104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 13114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - freezeset - final processing on a set of characters 1312cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int freezeset(struct parse *p, cset *cs); 13134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 13144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * The main task here is merging identical sets. This is usually a waste 13154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * of time (although the hash code minimizes the overhead), but can win 13164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 13174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * is done using addition rather than xor -- all ASCII [aA] sets xor to 13184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * the same value! 13194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 1320cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic sopno /* set number */ 1321cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesfreezeset( 1322cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1323cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs) 13244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1325cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes uch h; 1326cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t i; 1327cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *top; 13284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cset *cs2; 1329cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t css; 1330cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1331cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1332cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 1333cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1334cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes h = cs->hash; 1335cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes top = &p->g->sets[p->g->ncsets]; 1336cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes css = (size_t)p->g->csetsize; 13374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 13384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* look for an earlier one which is the same */ 13394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 13404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (cs2->hash == h && cs2 != cs) { 13414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* maybe */ 1342cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes for (i = 0; i < css; i++) 13434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 13444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; /* no */ 1345cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (i == css) 13464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; /* yes */ 13474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 13484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 13494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (cs2 < top) { /* found one */ 13504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross freeset(p, cs); 13514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cs = cs2; 13524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 13534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1354cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return (sopno)(cs - p->g->sets); 13554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 13564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 13574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 13584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - firstch - return first character in a set (which must have at least one) 1359cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int firstch(struct parse *p, cset *cs); 13604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 13614fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int /* character; there is no "none" value */ 1362cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesfirstch( 1363cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1364cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs) 13654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1366cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t i; 1367cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t css; 1368cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1369cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1370cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 13714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1372cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes css = (size_t)p->g->csetsize; 1373cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1374cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes for (i = 0; i < css; i++) 13754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (CHIN(cs, i)) 13764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return((char)i); 13774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(never); 13784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(0); /* arbitrary */ 13794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 13804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 13814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 13824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - nch - number of characters in a set 1383cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int nch(struct parse *p, cset *cs); 13844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 13854fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int 1386cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesnch( 1387cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1388cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs) 13894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1390cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t i; 1391cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t css; 13924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int n = 0; 13934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1394cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1395cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 1396cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1397cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes css = (size_t)p->g->csetsize; 1398cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1399cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes for (i = 0; i < css; i++) 14004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (CHIN(cs, i)) 14014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross n++; 14024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(n); 14034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 14044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 14054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 14064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - mcadd - add a collating element to a cset 1407cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void mcadd(struct parse *p, cset *cs, \ 1408cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == char *cp); 14094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 14104fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1411cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmcadd( 1412cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1413cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs, 1414cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes const char *cp) 14154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1416cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t oldend; 1417cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1418cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1419cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 1420cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cp != NULL); 1421cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1422cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes oldend = cs->smultis; 14234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 14244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cs->smultis += strlen(cp) + 1; 1425cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (cs->multis == NULL) 1426cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cs->multis = malloc(cs->smultis); 1427cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes else 1428cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cs->multis = realloc(cs->multis, cs->smultis); 1429cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (cs->multis == NULL) { 14304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_ESPACE); 14314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 14324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 14334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1434cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (void) strcpy(cs->multis + oldend - 1, cp); 1435cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cs->multis[cs->smultis - 1] = '\0'; 14364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 14374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1438cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#if 0 1439cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/* 1440cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes - mcsub - subtract a collating element from a cset 1441cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void mcsub(cset *cs, char *cp); 1442cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes */ 1443cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic void 1444cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmcsub( 1445cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs, 1446cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes char *cp) 1447cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes{ 1448cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes char *fp; 1449cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t len; 1450cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1451cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 1452cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cp != NULL); 1453cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1454cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes fp = mcfind(cs, cp); 1455cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes len = strlen(fp); 1456cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1457cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes assert(fp != NULL); 1458cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (void) memmove(fp, fp + len + 1, 1459cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cs->smultis - (fp + len + 1 - cs->multis)); 1460cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cs->smultis -= len; 1461cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1462cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (cs->smultis == 0) { 1463cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes free(cs->multis); 1464cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cs->multis = NULL; 1465cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return; 1466cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes } 1467cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1468cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cs->multis = realloc(cs->multis, cs->smultis); 1469cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes assert(cs->multis != NULL); 1470cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes} 1471cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1472cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/* 1473cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes - mcin - is a collating element in a cset? 1474cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int mcin(cset *cs, char *cp); 1475cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes */ 1476cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int 1477cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmcin( 1478cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs, 1479cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes char *cp) 1480cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes{ 1481cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1482cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 1483cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cp != NULL); 1484cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1485cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return(mcfind(cs, cp) != NULL); 1486cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes} 1487cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1488cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes/* 1489cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes - mcfind - find a collating element in a cset 1490cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static char *mcfind(cset *cs, char *cp); 1491cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes */ 1492cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic char * 1493cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmcfind( 1494cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs, 1495cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes char *cp) 1496cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes{ 1497cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes char *p; 1498cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1499cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 1500cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cp != NULL); 1501cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1502cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (cs->multis == NULL) 1503cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return(NULL); 1504cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 1505cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (strcmp(cp, p) == 0) 1506cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return(p); 1507cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return(NULL); 1508cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes} 1509cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes#endif 1510cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 15114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 15124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - mcinvert - invert the list of collating elements in a cset 1513cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void mcinvert(struct parse *p, cset *cs); 15144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 15154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This would have to know the set of possibilities. Implementation 15164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * is deferred. 15174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 15184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* ARGSUSED */ 15194fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1520cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmcinvert( 1521cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1522cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs) 15234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1524cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1525cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1526cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 1527cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 15284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(cs->multis == NULL); /* xxx */ 15294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 15304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 15314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 15324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - mccase - add case counterparts of the list of collating elements in a cset 1533cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void mccase(struct parse *p, cset *cs); 15344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 15354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This would have to know the set of possibilities. Implementation 15364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * is deferred. 15374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 15384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* ARGSUSED */ 15394fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1540cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesmccase( 1541cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1542cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cset *cs) 15434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1544cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1545cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1546cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(cs != NULL); 1547cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 15484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(cs->multis == NULL); /* xxx */ 15494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 15504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 15514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 15524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - isinsets - is this character in any sets? 1553cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int isinsets(struct re_guts *g, int c); 15544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 15554fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int /* predicate */ 1556cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesisinsets( 1557cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct re_guts *g, 1558cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int c) 15594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 15604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross uch *col; 1561cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t i; 1562cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t ncols; 1563cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes unsigned uc = (unsigned char)c; 1564cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1565cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(g != NULL); 1566cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1567cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (g->setbits == NULL) 1568cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return 0; 1569cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1570cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 15714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 15724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 15734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (col[uc] != 0) 15744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(1); 15754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(0); 15764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 15774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 15784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 15794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - samesets - are these two characters in exactly the same sets? 1580cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static int samesets(struct re_guts *g, int c1, int c2); 15814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 15824fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic int /* predicate */ 1583cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughessamesets( 1584cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct re_guts *g, 1585cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int c1, 1586cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes int c2) 15874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 15884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross uch *col; 1589cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t i; 1590cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes size_t ncols; 1591cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes unsigned uc1 = (unsigned char)c1; 1592cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes unsigned uc2 = (unsigned char)c2; 1593cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1594cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(g != NULL); 1595cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1596cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 15974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 15984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 15994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (col[uc1] != col[uc2]) 16004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(0); 16014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(1); 16024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 16034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 16044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 16054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - categorize - sort out character categories 1606cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void categorize(struct parse *p, struct re_guts *g); 16074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 16084fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1609cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughescategorize( 1610cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1611cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct re_guts *g) 16124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1613cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cat_t *cats; 16144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int c; 16154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int c2; 16164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cat_t cat; 16174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1618cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1619cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(g != NULL); 1620cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1621cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes cats = g->categories; 1622cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 16234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* avoid making error situations worse */ 16244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->error != 0) 16254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 16264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 16274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (c = CHAR_MIN; c <= CHAR_MAX; c++) 16284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (cats[c] == 0 && isinsets(g, c)) { 1629cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(__type_fit(unsigned char, 1630cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes g->ncategories + 1)); 16314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cat = g->ncategories++; 16324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cats[c] = cat; 16334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (c2 = c+1; c2 <= CHAR_MAX; c2++) 16344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (cats[c2] == 0 && samesets(g, c, c2)) 16354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cats[c2] = cat; 16364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 16374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 16384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 16394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 16404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - dupl - emit a duplicate of a bunch of sops 1641cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static sopno dupl(struct parse *p, sopno start, sopno finish); 16424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 16434fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic sopno /* start of duplicate */ 1644cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesdupl( 1645cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1646cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno start, /* from here */ 1647cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno finish) /* to this less one */ 16484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1649cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno ret; 16504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno len = finish - start; 16514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1652cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1653cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1654cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes ret = HERE(); 1655cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 16564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(finish >= start); 16574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (len == 0) 16584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(ret); 1659cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (!enlarge(p, p->ssize + len))/* this many unexpected additions */ 1660cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return ret; 1661cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (void)memcpy(p->strip + p->slen, p->strip + start, 1662cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes (size_t)len * sizeof(sop)); 16634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->slen += len; 16644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(ret); 16654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 16664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 16674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 16684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - doemit - emit a strip operator 1669cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void doemit(struct parse *p, sop op, size_t opnd); 16704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 16714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * It might seem better to implement this as a macro with a function as 16724fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * hard-case backup, but it's just too big and messy unless there are 16734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * some changes to the data structures. Maybe later. 16744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 16754fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1676cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesdoemit( 1677cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1678cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sop op, 1679cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno opnd) 16804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1681cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1682cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 16834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* avoid making error situations worse */ 16844fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->error != 0) 16854fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 16864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 16874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* deal with oversize operands ("can't happen", more or less) */ 16884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(opnd < 1<<OPSHIFT); 16894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 16904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* deal with undersized strip */ 16914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->slen >= p->ssize) 1692cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ 1693cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return; 16944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 16954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* finally, it's all reduced to the easy case */ 1696cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->strip[p->slen++] = (sop)SOP(op, opnd); 16974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 16984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 16994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 17004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - doinsert - insert a sop into the strip 1701cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); 17024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 17034fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1704cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesdoinsert( 1705cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1706cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sop op, 1707cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno opnd, 1708cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno pos) 17094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 17104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno sn; 17114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sop s; 17124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross int i; 17134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1714cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1715cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 17164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* avoid making error situations worse */ 17174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->error != 0) 17184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 17194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 17204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sn = HERE(); 17214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross EMIT(op, opnd); /* do checks, ensure space */ 17224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(HERE() == sn+1); 17234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross s = p->strip[sn]; 17244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 17254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* adjust paren pointers */ 17264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(pos > 0); 17274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (i = 1; i < NPAREN; i++) { 17284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->pbegin[i] >= pos) { 17294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->pbegin[i]++; 17304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 17314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->pend[i] >= pos) { 17324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->pend[i]++; 17334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 17344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 17354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1736cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes memmove(&p->strip[pos+1], &p->strip[pos], (HERE()-pos-1)*sizeof(sop)); 17374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->strip[pos] = s; 17384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 17394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 17404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 17414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - dofwd - complete a forward reference 1742cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void dofwd(struct parse *p, sopno pos, sop value); 17434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 17444fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1745cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesdofwd( 1746cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1747cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno pos, 1748cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno value) 17494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1750cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1751cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1752cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 17534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* avoid making error situations worse */ 17544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->error != 0) 17554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 17564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 17574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(value < 1<<OPSHIFT); 1758cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->strip[pos] = (sop)(OP(p->strip[pos]) | value); 17594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 17604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 17614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 17624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - enlarge - enlarge the strip 1763cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void enlarge(struct parse *p, sopno size); 17644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 1765cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstatic int 1766cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesenlarge( 1767cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1768cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno size) 17694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 17704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sop *sp; 1771cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sopno osize; 1772cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1773cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 17744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 17754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->ssize >= size) 1776cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return 1; 17774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1778cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes osize = p->ssize; 1779cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->ssize = size; 1780cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (MEMSIZE(p) > MEMLIMIT) 1781cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes goto oomem; 1782cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sp = realloc(p->strip, p->ssize * sizeof(sop)); 17834fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (sp == NULL) { 1784cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesoomem: 1785cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes p->ssize = osize; 17864fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_ESPACE); 1787cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return 0; 17884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 17894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross p->strip = sp; 1790cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes return 1; 17914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 17924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 17934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 17944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - stripsnug - compact the strip 1795cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void stripsnug(struct parse *p, struct re_guts *g); 17964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 17974fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1798cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesstripsnug( 1799cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1800cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct re_guts *g) 18014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 1802cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1803cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1804cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(g != NULL); 1805cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 18064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->nstates = p->slen; 1807cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes g->strip = realloc(p->strip, p->slen * sizeof(sop)); 18084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (g->strip == NULL) { 18094fa7b105644222d9b35347c9d226ca8e011072ebColin Cross SETERROR(REG_ESPACE); 18104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->strip = p->strip; 18114fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 18124fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 18134fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 18144fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 18154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - findmust - fill in must and mlen with longest mandatory literal string 1816cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static void findmust(struct parse *p, struct re_guts *g); 18174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 18184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * This algorithm could do fancy things like analyzing the operands of | 18194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * for common subsequences. Someday. This code is simple and finds most 18204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * of the interesting cases. 18214fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * 18224fa7b105644222d9b35347c9d226ca8e011072ebColin Cross * Note that must and mlen got initialized during setup. 18234fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 18244fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic void 1825cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughesfindmust( 1826cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1827cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct re_guts *g) 18284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 18294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sop *scan; 1830cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sop *start = NULL; 1831cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes sop *newstart = NULL; 18324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno newlen; 18334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sop s; 18344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross char *cp; 18354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno i; 18364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1837cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1838cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(g != NULL); 1839cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 18404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* avoid making error situations worse */ 18414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->error != 0) 18424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 18434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 18444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* find the longest OCHAR sequence in strip */ 18454fa7b105644222d9b35347c9d226ca8e011072ebColin Cross newlen = 0; 18464fa7b105644222d9b35347c9d226ca8e011072ebColin Cross scan = g->strip + 1; 18474fa7b105644222d9b35347c9d226ca8e011072ebColin Cross do { 18484fa7b105644222d9b35347c9d226ca8e011072ebColin Cross s = *scan++; 18494fa7b105644222d9b35347c9d226ca8e011072ebColin Cross switch (OP(s)) { 18504fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case OCHAR: /* sequence member */ 18514fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (newlen == 0) /* new sequence */ 18524fa7b105644222d9b35347c9d226ca8e011072ebColin Cross newstart = scan - 1; 18534fa7b105644222d9b35347c9d226ca8e011072ebColin Cross newlen++; 18544fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 18554fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case OPLUS_: /* things that don't break one */ 18564fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case OLPAREN: 18574fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case ORPAREN: 18584fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 18594fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case OQUEST_: /* things that must be skipped */ 18604fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case OCH_: 18614fa7b105644222d9b35347c9d226ca8e011072ebColin Cross scan--; 18624fa7b105644222d9b35347c9d226ca8e011072ebColin Cross do { 18634fa7b105644222d9b35347c9d226ca8e011072ebColin Cross scan += OPND(s); 18644fa7b105644222d9b35347c9d226ca8e011072ebColin Cross s = *scan; 18654fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* assert() interferes w debug printouts */ 18664fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (OP(s) != O_QUEST && OP(s) != O_CH && 18674fa7b105644222d9b35347c9d226ca8e011072ebColin Cross OP(s) != OOR2) { 18684fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->iflags |= BAD; 18694fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 18704fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 18714fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } while (OP(s) != O_QUEST && OP(s) != O_CH); 1872cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes /* FALLTHROUGH */ 18734fa7b105644222d9b35347c9d226ca8e011072ebColin Cross default: /* things that break a sequence */ 18744fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (newlen > g->mlen) { /* ends one */ 18754fa7b105644222d9b35347c9d226ca8e011072ebColin Cross start = newstart; 18764fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->mlen = newlen; 18774fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 18784fa7b105644222d9b35347c9d226ca8e011072ebColin Cross newlen = 0; 18794fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 18804fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 18814fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } while (OP(s) != OEND); 18824fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1883cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (start == NULL) 1884cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes g->mlen = 0; 1885cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 1886cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes if (g->mlen == 0) /* there isn't one */ 18874fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 18884fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 18894fa7b105644222d9b35347c9d226ca8e011072ebColin Cross /* turn it into a character string */ 18904fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->must = malloc((size_t)g->mlen + 1); 18914fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (g->must == NULL) { /* argh; just forget it */ 18924fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->mlen = 0; 18934fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return; 18944fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 18954fa7b105644222d9b35347c9d226ca8e011072ebColin Cross cp = g->must; 18964fa7b105644222d9b35347c9d226ca8e011072ebColin Cross scan = start; 18974fa7b105644222d9b35347c9d226ca8e011072ebColin Cross for (i = g->mlen; i > 0; i--) { 18984fa7b105644222d9b35347c9d226ca8e011072ebColin Cross while (OP(s = *scan++) != OCHAR) 18994fa7b105644222d9b35347c9d226ca8e011072ebColin Cross continue; 19004fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(cp < g->must + g->mlen); 19014fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *cp++ = (char)OPND(s); 19024fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 19034fa7b105644222d9b35347c9d226ca8e011072ebColin Cross assert(cp == g->must + g->mlen); 19044fa7b105644222d9b35347c9d226ca8e011072ebColin Cross *cp++ = '\0'; /* just on general principles */ 19054fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 19064fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 19074fa7b105644222d9b35347c9d226ca8e011072ebColin Cross/* 19084fa7b105644222d9b35347c9d226ca8e011072ebColin Cross - pluscount - count + nesting 1909cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes == static sopno pluscount(struct parse *p, struct re_guts *g); 19104fa7b105644222d9b35347c9d226ca8e011072ebColin Cross */ 19114fa7b105644222d9b35347c9d226ca8e011072ebColin Crossstatic sopno /* nesting depth */ 1912cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughespluscount( 1913cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct parse *p, 1914cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes struct re_guts *g) 19154fa7b105644222d9b35347c9d226ca8e011072ebColin Cross{ 19164fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sop *scan; 19174fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sop s; 19184fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno plusnest = 0; 19194fa7b105644222d9b35347c9d226ca8e011072ebColin Cross sopno maxnest = 0; 19204fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 1921cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(p != NULL); 1922cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes _DIAGASSERT(g != NULL); 1923cc213f871bf4c5329eb5eb7a80a0ce9d4a880af8Elliott Hughes 19244fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (p->error != 0) 19254fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(0); /* there may not be an OEND */ 19264fa7b105644222d9b35347c9d226ca8e011072ebColin Cross 19274fa7b105644222d9b35347c9d226ca8e011072ebColin Cross scan = g->strip + 1; 19284fa7b105644222d9b35347c9d226ca8e011072ebColin Cross do { 19294fa7b105644222d9b35347c9d226ca8e011072ebColin Cross s = *scan++; 19304fa7b105644222d9b35347c9d226ca8e011072ebColin Cross switch (OP(s)) { 19314fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case OPLUS_: 19324fa7b105644222d9b35347c9d226ca8e011072ebColin Cross plusnest++; 19334fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 19344fa7b105644222d9b35347c9d226ca8e011072ebColin Cross case O_PLUS: 19354fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (plusnest > maxnest) 19364fa7b105644222d9b35347c9d226ca8e011072ebColin Cross maxnest = plusnest; 19374fa7b105644222d9b35347c9d226ca8e011072ebColin Cross plusnest--; 19384fa7b105644222d9b35347c9d226ca8e011072ebColin Cross break; 19394fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } 19404fa7b105644222d9b35347c9d226ca8e011072ebColin Cross } while (OP(s) != OEND); 19414fa7b105644222d9b35347c9d226ca8e011072ebColin Cross if (plusnest != 0) 19424fa7b105644222d9b35347c9d226ca8e011072ebColin Cross g->iflags |= BAD; 19434fa7b105644222d9b35347c9d226ca8e011072ebColin Cross return(maxnest); 19444fa7b105644222d9b35347c9d226ca8e011072ebColin Cross} 1945