12df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/*- 22df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * This code is derived from OpenBSD's libc/regex, original license follows: 32df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 42df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * Copyright (c) 1992, 1993, 1994 Henry Spencer. 52df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * Copyright (c) 1992, 1993, 1994 62df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * The Regents of the University of California. All rights reserved. 72df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 82df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * This code is derived from software contributed to Berkeley by 92df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * Henry Spencer. 102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * Redistribution and use in source and binary forms, with or without 122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * modification, are permitted provided that the following conditions 132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * are met: 142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 1. Redistributions of source code must retain the above copyright 152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * notice, this list of conditions and the following disclaimer. 162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 2. Redistributions in binary form must reproduce the above copyright 172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * notice, this list of conditions and the following disclaimer in the 182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * documentation and/or other materials provided with the distribution. 192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 3. Neither the name of the University nor the names of its contributors 202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * may be used to endorse or promote products derived from this software 212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * without specific prior written permission. 222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * SUCH DAMAGE. 342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include <sys/types.h> 392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include <stdio.h> 402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include <string.h> 412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include <ctype.h> 422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include <limits.h> 432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include <stdlib.h> 442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include "regex_impl.h" 452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include "regutils.h" 472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include "regex2.h" 482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include "regcclass.h" 502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include "regcname.h" 512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include "llvm/Config/config.h" 532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#if HAVE_STDINT_H 542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#include <stdint.h> 552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#else 562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* Pessimistically bound memory use */ 572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define SIZE_MAX UINT_MAX 582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#endif 592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * parse structure, passed up and down to avoid global variables and 622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * other clumsinesses 632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstruct parse { 652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char *next; /* next character in RE */ 662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char *end; /* end of string (-> NUL normally) */ 672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int error; /* has an error been seen? */ 682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sop *strip; /* malloced strip */ 692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno ssize; /* malloced strip size (allocated) */ 702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno slen; /* malloced strip length (used) */ 712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int ncsalloc; /* number of csets allocated */ 722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens struct re_guts *g; 732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens# define NPAREN 10 /* we need to remember () 1-9 for back refs */ 742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno pend[NPAREN]; /* -> ) ([0] unused) */ 762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens}; 772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void p_ere(struct parse *, int); 792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void p_ere_exp(struct parse *); 802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void p_str(struct parse *); 812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void p_bre(struct parse *, int, int); 822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int p_simp_re(struct parse *, int); 832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int p_count(struct parse *); 842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void p_bracket(struct parse *); 852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void p_b_term(struct parse *, cset *); 862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void p_b_cclass(struct parse *, cset *); 872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void p_b_eclass(struct parse *, cset *); 882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic char p_b_symbol(struct parse *); 892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic char p_b_coll_elem(struct parse *, int); 902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic char othercase(int); 912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void bothcases(struct parse *, int); 922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void ordinary(struct parse *, int); 932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void nonnewline(struct parse *); 942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void repeat(struct parse *, sopno, int, int); 952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int seterr(struct parse *, int); 962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic cset *allocset(struct parse *); 972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void freeset(struct parse *, cset *); 982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int freezeset(struct parse *, cset *); 992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int firstch(struct parse *, cset *); 1002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int nch(struct parse *, cset *); 1012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void mcadd(struct parse *, cset *, const char *); 1022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void mcinvert(struct parse *, cset *); 1032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void mccase(struct parse *, cset *); 1042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int isinsets(struct re_guts *, int); 1052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int samesets(struct re_guts *, int, int); 1062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void categorize(struct parse *, struct re_guts *); 1072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic sopno dupl(struct parse *, sopno, sopno); 1082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void doemit(struct parse *, sop, size_t); 1092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void doinsert(struct parse *, sop, size_t, sopno); 1102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void dofwd(struct parse *, sopno, sop); 1112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void enlarge(struct parse *, sopno); 1122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void stripsnug(struct parse *, struct re_guts *); 1132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void findmust(struct parse *, struct re_guts *); 1142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic sopno pluscount(struct parse *, struct re_guts *); 1152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 1162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic char nuls[10]; /* place to point scanner in event of error */ 1172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 1182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 1192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * macros for use with parse structure 1202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * BEWARE: these know that the parse structure is named `p' !!! 1212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 1222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define PEEK() (*p->next) 1232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define PEEK2() (*(p->next+1)) 1242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define MORE() (p->next < p->end) 1252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define MORE2() (p->next+1 < p->end) 1262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define SEE(c) (MORE() && PEEK() == (c)) 1272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 1282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 1292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 1302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define NEXT() (p->next++) 1312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define NEXT2() (p->next += 2) 1322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define NEXTn(n) (p->next += (n)) 1332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define GETNEXT() (*p->next++) 1342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define SETERROR(e) seterr(p, (e)) 1352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define REQUIRE(co, e) (void)((co) || SETERROR(e)) 1362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 1372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 1382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 1392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 1402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 1412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 1422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 1432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define HERE() (p->slen) 1442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define THERE() (p->slen - 1) 1452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define THERETHERE() (p->slen - 2) 1462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define DROP(n) (p->slen -= (n)) 1472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 1482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#ifdef _POSIX2_RE_DUP_MAX 1492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define DUPMAX _POSIX2_RE_DUP_MAX 1502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#else 1512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define DUPMAX 255 1522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#endif 1532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define INFINITY (DUPMAX + 1) 1542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 1552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#ifndef NDEBUG 1562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int never = 0; /* for use in asserts; shuts lint up */ 1572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#else 1582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#define never 0 /* some <assert.h>s have bugs too */ 1592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#endif 1602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 1612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 1622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - llvm_regcomp - interface for parser and compilation 1632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 1642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensint /* 0 success, otherwise REG_something */ 1652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensllvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags) 1662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 1672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens struct parse pa; 1682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens struct re_guts *g; 1692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens struct parse *p = &pa; 1702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int i; 1712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t len; 1722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#ifdef REDEBUG 1732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens# define GOODFLAGS(f) (f) 1742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#else 1752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens# define GOODFLAGS(f) ((f)&~REG_DUMP) 1762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#endif 1772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 1782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cflags = GOODFLAGS(cflags); 1792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 1802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(REG_INVARG); 1812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 1822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cflags®_PEND) { 1832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (preg->re_endp < pattern) 1842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(REG_INVARG); 1852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens len = preg->re_endp - pattern; 1862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else 1872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens len = strlen((const char *)pattern); 1882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 1892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* do the mallocs early so failure handling is easy */ 1902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g = (struct re_guts *)malloc(sizeof(struct re_guts) + 1912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens (NC-1)*sizeof(cat_t)); 1922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (g == NULL) 1932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(REG_ESPACE); 1942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 1952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->strip = (sop *)calloc(p->ssize, sizeof(sop)); 1962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->slen = 0; 1972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->strip == NULL) { 1982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens free((char *)g); 1992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(REG_ESPACE); 2002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 2012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 2022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* set things up */ 2032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g = g; 2042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->next = (char *)pattern; /* convenience; we do not modify it */ 2052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->end = p->next + len; 2062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->error = 0; 2072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->ncsalloc = 0; 2082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = 0; i < NPAREN; i++) { 2092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->pbegin[i] = 0; 2102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->pend[i] = 0; 2112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 2122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->csetsize = NC; 2132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->sets = NULL; 2142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->setbits = NULL; 2152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->ncsets = 0; 2162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->cflags = cflags; 2172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->iflags = 0; 2182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->nbol = 0; 2192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->neol = 0; 2202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->must = NULL; 2212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->mlen = 0; 2222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->nsub = 0; 2232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->ncategories = 1; /* category 0 is "everything else" */ 2242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->categories = &g->catspace[-(CHAR_MIN)]; 2252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 2262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->backrefs = 0; 2272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 2282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* do it */ 2292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OEND, 0); 2302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->firststate = THERE(); 2312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cflags®_EXTENDED) 2322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_ere(p, OUT); 2332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens else if (cflags®_NOSPEC) 2342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_str(p); 2352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens else 2362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_bre(p, OUT, OUT); 2372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OEND, 0); 2382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->laststate = THERE(); 2392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 2402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* tidy up loose ends and fill things in */ 2412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens categorize(p, g); 2422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens stripsnug(p, g); 2432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens findmust(p, g); 2442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->nplus = pluscount(p, g); 2452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->magic = MAGIC2; 2462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens preg->re_nsub = g->nsub; 2472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens preg->re_g = g; 2482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens preg->re_magic = MAGIC1; 2492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#ifndef REDEBUG 2502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* not debugging, so can't rely on the assert() in llvm_regexec() */ 2512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (g->iflags®EX_BAD) 2522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ASSERT); 2532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#endif 2542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 2552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* win or lose, we're done */ 2562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->error != 0) /* lose */ 2572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens llvm_regfree(preg); 2582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(p->error); 2592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 2602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 2612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 2622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_ere - ERE parser top level, concatenation and alternation 2632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 2642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 2652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_ere(struct parse *p, int stop) /* character this ERE should end at */ 2662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 2672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char c; 2682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno prevback = 0; 2692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno prevfwd = 0; 2702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno conc; 2712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int first = 1; /* is this the first alternative? */ 2722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 2732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (;;) { 2742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* do a bunch of concatenated expressions */ 2752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens conc = HERE(); 2762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while (MORE() && (c = PEEK()) != '|' && c != stop) 2772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_ere_exp(p); 2782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 2792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 2802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!EAT('|')) 2812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; /* NOTE BREAK OUT */ 2822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 2832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (first) { 2842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens INSERT(OCH_, conc); /* offset is wrong */ 2852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens prevfwd = conc; 2862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens prevback = conc; 2872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens first = 0; 2882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 2892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(OOR1, prevback); 2902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens prevback = THERE(); 2912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens AHEAD(prevfwd); /* fix previous offset */ 2922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens prevfwd = HERE(); 2932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OOR2, 0); /* offset is very wrong */ 2942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 2952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 2962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!first) { /* tail-end fixups */ 2972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens AHEAD(prevfwd); 2982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(O_CH, prevback); 2992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 3002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 3012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(!MORE() || SEE(stop)); 3022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 3032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 3042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 3052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 3062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 3072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 3082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_ere_exp(struct parse *p) 3092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 3102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char c; 3112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno pos; 3122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int count; 3132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int count2; 3142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int backrefnum; 3152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno subno; 3162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int wascaret = 0; 3172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 3182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(MORE()); /* caller should have ensured this */ 3192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = GETNEXT(); 3202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 3212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens pos = HERE(); 3222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens switch (c) { 3232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '(': 3242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EPAREN); 3252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->nsub++; 3262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens subno = p->g->nsub; 3272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (subno < NPAREN) 3282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->pbegin[subno] = HERE(); 3292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OLPAREN, subno); 3302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!SEE(')')) 3312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_ere(p, ')'); 3322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (subno < NPAREN) { 3332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->pend[subno] = HERE(); 3342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(p->pend[subno] != 0); 3352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 3362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(ORPAREN, subno); 3372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens MUSTEAT(')', REG_EPAREN); 3382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 3392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#ifndef POSIX_MISTAKE 3402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case ')': /* happens only if no current unmatched ( */ 3412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* 3422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * You may ask, why the ifndef? Because I didn't notice 3432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * this until slightly too late for 1003.2, and none of the 3442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * other 1003.2 regular-expression reviewers noticed it at 3452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * all. So an unmatched ) is legal POSIX, at least until 3462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * we can get it fixed. 3472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 3482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_EPAREN); 3492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 3502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens#endif 3512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '^': 3522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OBOL, 0); 3532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->iflags |= USEBOL; 3542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->nbol++; 3552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens wascaret = 1; 3562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 3572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '$': 3582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OEOL, 0); 3592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->iflags |= USEEOL; 3602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->neol++; 3612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 3622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '|': 3632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_EMPTY); 3642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 3652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '*': 3662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '+': 3672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '?': 3682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_BADRPT); 3692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 3702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '.': 3712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->g->cflags®_NEWLINE) 3722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens nonnewline(p); 3732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens else 3742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OANY, 0); 3752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 3762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '[': 3772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_bracket(p); 3782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 3792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '\\': 3802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EESCAPE); 3812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = GETNEXT(); 3822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (c >= '1' && c <= '9') { 3832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* \[0-9] is taken to be a back-reference to a previously specified 3842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * matching group. backrefnum will hold the number. The matching 3852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * group must exist (i.e. if \4 is found there must have been at 3862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * least 4 matching groups specified in the pattern previously). 3872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 3882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens backrefnum = c - '0'; 3892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->pend[backrefnum] == 0) { 3902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ESUBREG); 3912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 3922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 3932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 3942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* Make sure everything checks out and emit the sequence 3952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * that marks a back-reference to the parse structure. 3962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 3972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(backrefnum <= p->g->nsub); 3982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OBACK_, backrefnum); 3992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(p->pbegin[backrefnum] != 0); 4002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(OP(p->strip[p->pbegin[backrefnum]]) != OLPAREN); 4012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(OP(p->strip[p->pend[backrefnum]]) != ORPAREN); 4022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens (void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]); 4032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(O_BACK, backrefnum); 4042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->backrefs = 1; 4052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else { 4062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* Other chars are simply themselves when escaped with a backslash. 4072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 4082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ordinary(p, c); 4092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 4102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 4112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '{': /* okay as ordinary except if digit follows */ 4122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); 4132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* FALLTHROUGH */ 4142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens default: 4152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ordinary(p, c); 4162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 4172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 4182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 4192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!MORE()) 4202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 4212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = PEEK(); 4222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* we call { a repetition if followed by a digit */ 4232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!( c == '*' || c == '+' || c == '?' || 4242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) 4252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; /* no repetition, we're done */ 4262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens NEXT(); 4272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 4282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(!wascaret, REG_BADRPT); 4292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens switch (c) { 4302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '*': /* implemented as +? */ 4312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* this case does not require the (y|) trick, noKLUDGE */ 4322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens INSERT(OPLUS_, pos); 4332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(O_PLUS, pos); 4342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens INSERT(OQUEST_, pos); 4352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(O_QUEST, pos); 4362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 4372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '+': 4382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens INSERT(OPLUS_, pos); 4392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(O_PLUS, pos); 4402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 4412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '?': 4422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 4432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens INSERT(OCH_, pos); /* offset slightly wrong */ 4442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(OOR1, pos); /* this one's right */ 4452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens AHEAD(pos); /* fix the OCH_ */ 4462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OOR2, 0); /* offset very wrong... */ 4472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens AHEAD(THERE()); /* ...so fix it */ 4482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(O_CH, THERETHERE()); 4492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 4502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '{': 4512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens count = p_count(p); 4522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (EAT(',')) { 4532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (isdigit((uch)PEEK())) { 4542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens count2 = p_count(p); 4552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(count <= count2, REG_BADBR); 4562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else /* single number with comma */ 4572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens count2 = INFINITY; 4582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else /* just a single number */ 4592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens count2 = count; 4602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens repeat(p, pos, count, count2); 4612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!EAT('}')) { /* error heuristics */ 4622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while (MORE() && PEEK() != '}') 4632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens NEXT(); 4642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EBRACE); 4652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_BADBR); 4662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 4672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 4682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 4692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 4702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!MORE()) 4712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 4722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = PEEK(); 4732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!( c == '*' || c == '+' || c == '?' || 4742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) 4752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 4762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_BADRPT); 4772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 4782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 4792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 4802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_str - string (no metacharacters) "parser" 4812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 4822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 4832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_str(struct parse *p) 4842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 4852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EMPTY); 4862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while (MORE()) 4872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ordinary(p, GETNEXT()); 4882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 4892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 4902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 4912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_bre - BRE parser top level, anchoring and concatenation 4922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * Giving end1 as OUT essentially eliminates the end1/end2 check. 4932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 4942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * This implementation is a bit of a kludge, in that a trailing $ is first 4952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * taken as an ordinary character and then revised to be an anchor. The 4962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * only undesirable side effect is that '$' gets included as a character 4972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * category in such cases. This is fairly harmless; not worth fixing. 4982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * The amount of lookahead needed to avoid this kludge is excessive. 4992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 5002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 5012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_bre(struct parse *p, 5022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int end1, /* first terminating character */ 5032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int end2) /* second terminating character */ 5042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 5052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno start = HERE(); 5062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int first = 1; /* first subexpression? */ 5072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int wasdollar = 0; 5082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 5092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (EAT('^')) { 5102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OBOL, 0); 5112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->iflags |= USEBOL; 5122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->nbol++; 5132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 5142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while (MORE() && !SEETWO(end1, end2)) { 5152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens wasdollar = p_simp_re(p, first); 5162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens first = 0; 5172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 5182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (wasdollar) { /* oops, that was a trailing anchor */ 5192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens DROP(1); 5202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OEOL, 0); 5212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->iflags |= USEEOL; 5222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->neol++; 5232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 5242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 5252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 5262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 5272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 5282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 5292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 5302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 5312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int /* was the simple RE an unbackslashed $? */ 5322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_simp_re(struct parse *p, 5332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int starordinary) /* is a leading * an ordinary character? */ 5342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 5352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int c; 5362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int count; 5372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int count2; 5382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno pos; 5392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int i; 5402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno subno; 5412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens# define BACKSL (1<<CHAR_BIT) 5422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 5432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens pos = HERE(); /* repetition op, if any, covers from here */ 5442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 5452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(MORE()); /* caller should have ensured this */ 5462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = GETNEXT(); 5472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (c == '\\') { 5482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EESCAPE); 5492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = BACKSL | GETNEXT(); 5502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 5512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens switch (c) { 5522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '.': 5532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->g->cflags®_NEWLINE) 5542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens nonnewline(p); 5552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens else 5562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OANY, 0); 5572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 5582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '[': 5592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_bracket(p); 5602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 5612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'{': 5622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_BADRPT); 5632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 5642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'(': 5652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->nsub++; 5662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens subno = p->g->nsub; 5672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (subno < NPAREN) 5682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->pbegin[subno] = HERE(); 5692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OLPAREN, subno); 5702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* the MORE here is an error heuristic */ 5712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (MORE() && !SEETWO('\\', ')')) 5722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_bre(p, '\\', ')'); 5732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (subno < NPAREN) { 5742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->pend[subno] = HERE(); 5752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(p->pend[subno] != 0); 5762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 5772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(ORPAREN, subno); 5782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 5792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 5802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|')': /* should not get here -- must be user */ 5812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'}': 5822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_EPAREN); 5832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 5842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'1': 5852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'2': 5862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'3': 5872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'4': 5882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'5': 5892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'6': 5902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'7': 5912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'8': 5922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case BACKSL|'9': 5932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens i = (c&~BACKSL) - '0'; 5942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(i < NPAREN); 5952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->pend[i] != 0) { 5962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(i <= p->g->nsub); 5972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OBACK_, i); 5982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(p->pbegin[i] != 0); 5992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 6002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(OP(p->strip[p->pend[i]]) == ORPAREN); 6012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 6022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(O_BACK, i); 6032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else 6042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ESUBREG); 6052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->backrefs = 1; 6062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 6072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '*': 6082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(starordinary, REG_BADRPT); 6092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* FALLTHROUGH */ 6102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens default: 6112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ordinary(p, (char)c); 6122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 6132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 6142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 6152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (EAT('*')) { /* implemented as +? */ 6162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* this case does not require the (y|) trick, noKLUDGE */ 6172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens INSERT(OPLUS_, pos); 6182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(O_PLUS, pos); 6192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens INSERT(OQUEST_, pos); 6202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(O_QUEST, pos); 6212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else if (EATTWO('\\', '{')) { 6222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens count = p_count(p); 6232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (EAT(',')) { 6242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (MORE() && isdigit((uch)PEEK())) { 6252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens count2 = p_count(p); 6262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(count <= count2, REG_BADBR); 6272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else /* single number with comma */ 6282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens count2 = INFINITY; 6292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else /* just a single number */ 6302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens count2 = count; 6312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens repeat(p, pos, count, count2); 6322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!EATTWO('\\', '}')) { /* error heuristics */ 6332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while (MORE() && !SEETWO('\\', '}')) 6342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens NEXT(); 6352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EBRACE); 6362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_BADBR); 6372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 6382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else if (c == '$') /* $ (but not \$) ends it */ 6392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(1); 6402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 6412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(0); 6422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 6432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 6442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 6452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_count - parse a repetition count 6462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 6472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int /* the value */ 6482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_count(struct parse *p) 6492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 6502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int count = 0; 6512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int ndigits = 0; 6522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 6532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { 6542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens count = count*10 + (GETNEXT() - '0'); 6552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ndigits++; 6562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 6572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 6582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 6592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(count); 6602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 6612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 6622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 6632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_bracket - parse a bracketed character list 6642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 6652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * Note a significant property of this code: if the allocset() did SETERROR, 6662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * no set operations are done. 6672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 6682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 6692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_bracket(struct parse *p) 6702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 6712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cset *cs; 6722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int invert = 0; 6732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 6742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* Dept of Truly Sickening Special-Case Kludges */ 6752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 6762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OBOW, 0); 6772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens NEXTn(6); 6782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 6792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 6802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 6812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OEOW, 0); 6822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens NEXTn(6); 6832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 6842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 6852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 6862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if ((cs = allocset(p)) == NULL) { 6872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* allocset did set error status in p */ 6882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 6892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 6902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 6912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (EAT('^')) 6922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens invert++; /* make note to invert set at end */ 6932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (EAT(']')) 6942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHadd(cs, ']'); 6952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens else if (EAT('-')) 6962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHadd(cs, '-'); 6972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 6982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_b_term(p, cs); 6992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (EAT('-')) 7002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHadd(cs, '-'); 7012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens MUSTEAT(']', REG_EBRACK); 7022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 7032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->error != 0) { /* don't mess things up further */ 7042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens freeset(p, cs); 7052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 7062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 7072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 7082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->g->cflags®_ICASE) { 7092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int i; 7102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int ci; 7112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 7122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = p->g->csetsize - 1; i >= 0; i--) 7132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (CHIN(cs, i) && isalpha(i)) { 7142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ci = othercase(i); 7152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (ci != i) 7162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHadd(cs, ci); 7172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 7182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cs->multis != NULL) 7192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens mccase(p, cs); 7202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 7212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (invert) { 7222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int i; 7232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 7242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = p->g->csetsize - 1; i >= 0; i--) 7252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (CHIN(cs, i)) 7262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHsub(cs, i); 7272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens else 7282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHadd(cs, i); 7292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->g->cflags®_NEWLINE) 7302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHsub(cs, '\n'); 7312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cs->multis != NULL) 7322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens mcinvert(p, cs); 7332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 7342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 7352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(cs->multis == NULL); /* xxx */ 7362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 7372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (nch(p, cs) == 1) { /* optimize singleton sets */ 7382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ordinary(p, firstch(p, cs)); 7392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens freeset(p, cs); 7402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else 7412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OANYOF, freezeset(p, cs)); 7422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 7432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 7442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 7452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_b_term - parse one term of a bracketed character list 7462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 7472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 7482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_b_term(struct parse *p, cset *cs) 7492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 7502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char c; 7512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char start, finish; 7522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int i; 7532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 7542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* classify what we've got */ 7552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens switch ((MORE()) ? PEEK() : '\0') { 7562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '[': 7572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = (MORE2()) ? PEEK2() : '\0'; 7582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 7592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '-': 7602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ERANGE); 7612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; /* NOTE RETURN */ 7622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 7632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens default: 7642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = '\0'; 7652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 7662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 7672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 7682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens switch (c) { 7692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case ':': /* character class */ 7702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens NEXT2(); 7712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EBRACK); 7722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = PEEK(); 7732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(c != '-' && c != ']', REG_ECTYPE); 7742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_b_cclass(p, cs); 7752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EBRACK); 7762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 7772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 7782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case '=': /* equivalence class */ 7792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens NEXT2(); 7802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EBRACK); 7812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = PEEK(); 7822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 7832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_b_eclass(p, cs); 7842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EBRACK); 7852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 7862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 7872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens default: /* symbol, ordinary character, or range */ 7882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* xxx revision needed for multichar stuff */ 7892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens start = p_b_symbol(p); 7902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (SEE('-') && MORE2() && PEEK2() != ']') { 7912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* range */ 7922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens NEXT(); 7932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (EAT('-')) 7942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens finish = '-'; 7952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens else 7962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens finish = p_b_symbol(p); 7972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } else 7982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens finish = start; 7992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* xxx what about signed chars here... */ 8002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(start <= finish, REG_ERANGE); 8012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = start; i <= finish; i++) 8022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHadd(cs, i); 8032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 8042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 8052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 8062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 8082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_b_cclass - parse a character-class name and deal with it 8092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 8102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 8112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_b_cclass(struct parse *p, cset *cs) 8122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 8132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char *sp = p->next; 8142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens struct cclass *cp; 8152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t len; 8162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens const char *u; 8172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char c; 8182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while (MORE() && isalpha((uch)PEEK())) 8202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens NEXT(); 8212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens len = p->next - sp; 8222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (cp = cclasses; cp->name != NULL; cp++) 8232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 8242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 8252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cp->name == NULL) { 8262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* oops, didn't find it */ 8272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ECTYPE); 8282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 8292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 8302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens u = cp->chars; 8322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while ((c = *u++) != '\0') 8332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHadd(cs, c); 8342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 8352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens MCadd(p, cs, u); 8362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 8372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 8392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_b_eclass - parse an equivalence-class name and deal with it 8402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 8412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * This implementation is incomplete. xxx 8422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 8432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 8442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_b_eclass(struct parse *p, cset *cs) 8452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 8462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char c; 8472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens c = p_b_coll_elem(p, '='); 8492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHadd(cs, c); 8502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 8512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 8532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 8542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 8552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic char /* value of symbol */ 8562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_b_symbol(struct parse *p) 8572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 8582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char value; 8592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(MORE(), REG_EBRACK); 8612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!EATTWO('[', '.')) 8622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(GETNEXT()); 8632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* collating symbol */ 8652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens value = p_b_coll_elem(p, '.'); 8662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 8672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(value); 8682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 8692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 8712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - p_b_coll_elem - parse a collating-element name and look it up 8722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 8732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic char /* value of collating element */ 8742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensp_b_coll_elem(struct parse *p, 8752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int endc) /* name ended by endc,']' */ 8762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 8772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char *sp = p->next; 8782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens struct cname *cp; 8792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int len; 8802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while (MORE() && !SEETWO(endc, ']')) 8822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens NEXT(); 8832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!MORE()) { 8842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_EBRACK); 8852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(0); 8862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 8872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens len = p->next - sp; 8882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (cp = cnames; cp->name != NULL; cp++) 8892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 8902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(cp->code); /* known name */ 8912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (len == 1) 8922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(*sp); /* single character */ 8932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ECOLLATE); /* neither */ 8942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(0); 8952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 8962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 8972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 8982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - othercase - return the case counterpart of an alphabetic 8992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 9002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic char /* if no counterpart, return ch */ 9012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensothercase(int ch) 9022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 9032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ch = (uch)ch; 9042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(isalpha(ch)); 9052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (isupper(ch)) 9062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return ((uch)tolower(ch)); 9072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens else if (islower(ch)) 9082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return ((uch)toupper(ch)); 9092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens else /* peculiar, but could happen */ 9102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(ch); 9112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 9122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 9132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 9142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - bothcases - emit a dualcase version of a two-case character 9152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 9162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * Boy, is this implementation ever a kludge... 9172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 9182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 9192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensbothcases(struct parse *p, int ch) 9202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 9212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char *oldnext = p->next; 9222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char *oldend = p->end; 9232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char bracket[3]; 9242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 9252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ch = (uch)ch; 9262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(othercase(ch) != ch); /* p_bracket() would recurse */ 9272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->next = bracket; 9282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->end = bracket+2; 9292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens bracket[0] = ch; 9302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens bracket[1] = ']'; 9312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens bracket[2] = '\0'; 9322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_bracket(p); 9332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(p->next == bracket+2); 9342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->next = oldnext; 9352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->end = oldend; 9362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 9372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 9382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 9392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - ordinary - emit an ordinary character 9402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 9412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 9422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensordinary(struct parse *p, int ch) 9432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 9442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cat_t *cap = p->g->categories; 9452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 9462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) 9472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens bothcases(p, ch); 9482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens else { 9492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OCHAR, (uch)ch); 9502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cap[ch] == 0) 9512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cap[ch] = p->g->ncategories++; 9522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 9532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 9542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 9552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 9562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - nonnewline - emit REG_NEWLINE version of OANY 9572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 9582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * Boy, is this implementation ever a kludge... 9592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 9602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 9612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensnonnewline(struct parse *p) 9622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 9632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char *oldnext = p->next; 9642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char *oldend = p->end; 9652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char bracket[4]; 9662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 9672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->next = bracket; 9682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->end = bracket+3; 9692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens bracket[0] = '^'; 9702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens bracket[1] = '\n'; 9712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens bracket[2] = ']'; 9722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens bracket[3] = '\0'; 9732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p_bracket(p); 9742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(p->next == bracket+3); 9752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->next = oldnext; 9762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->end = oldend; 9772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 9782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 9792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 9802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - repeat - generate code for a bounded repetition, recursively if needed 9812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 9822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 9832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensrepeat(struct parse *p, 9842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno start, /* operand from here to end of strip */ 9852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int from, /* repeated from this number */ 9862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int to) /* to this number of times (maybe INFINITY) */ 9872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 9882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno finish = HERE(); 9892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens# define N 2 9902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens# define INF 3 9912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens# define REP(f, t) ((f)*8 + (t)) 9922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 9932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno copy; 9942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 9952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->error != 0) /* head off possible runaway recursion */ 9962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 9972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 9982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(from <= to); 9992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 10002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens switch (REP(MAP(from), MAP(to))) { 10012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case REP(0, 0): /* must be user doing this */ 10022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens DROP(finish-start); /* drop the operand */ 10032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 10042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case REP(0, 1): /* as x{1,1}? */ 10052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case REP(0, N): /* as x{1,n}? */ 10062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case REP(0, INF): /* as x{1,}? */ 10072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 10082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens INSERT(OCH_, start); /* offset is wrong... */ 10092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens repeat(p, start+1, 1, to); 10102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(OOR1, start); 10112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens AHEAD(start); /* ... fix it */ 10122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OOR2, 0); 10132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens AHEAD(THERE()); 10142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(O_CH, THERETHERE()); 10152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 10162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case REP(1, 1): /* trivial case */ 10172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* done */ 10182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 10192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case REP(1, N): /* as x?x{1,n-1} */ 10202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 10212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens INSERT(OCH_, start); 10222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(OOR1, start); 10232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens AHEAD(start); 10242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(OOR2, 0); /* offset very wrong... */ 10252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens AHEAD(THERE()); /* ...so fix it */ 10262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(O_CH, THERETHERE()); 10272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens copy = dupl(p, start+1, finish+1); 10282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(copy == finish+4); 10292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens repeat(p, copy, 1, to-1); 10302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 10312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case REP(1, INF): /* as x+ */ 10322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens INSERT(OPLUS_, start); 10332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ASTERN(O_PLUS, start); 10342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 10352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case REP(N, N): /* as xx{m-1,n-1} */ 10362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens copy = dupl(p, start, finish); 10372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens repeat(p, copy, from-1, to-1); 10382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 10392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case REP(N, INF): /* as xx{n-1,INF} */ 10402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens copy = dupl(p, start, finish); 10412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens repeat(p, copy, from-1, to); 10422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 10432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens default: /* "can't happen" */ 10442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ASSERT); /* just in case */ 10452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 10462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 10472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 10482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 10492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 10502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - seterr - set an error condition 10512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 10522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int /* useless but makes type checking happy */ 10532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensseterr(struct parse *p, int e) 10542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 10552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->error == 0) /* keep earliest error condition */ 10562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->error = e; 10572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->next = nuls; /* try to bring things to a halt */ 10582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->end = nuls; 10592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(0); /* make the return value well-defined */ 10602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 10612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 10622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 10632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - allocset - allocate a set of characters for [] 10642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 10652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic cset * 10662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensallocset(struct parse *p) 10672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 10682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int no = p->g->ncsets++; 10692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t nc; 10702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t nbytes; 10712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cset *cs; 10722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t css = (size_t)p->g->csetsize; 10732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int i; 10742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 10752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (no >= p->ncsalloc) { /* need another column of space */ 10762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens void *ptr; 10772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 10782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->ncsalloc += CHAR_BIT; 10792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens nc = p->ncsalloc; 10802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (nc > SIZE_MAX / sizeof(cset)) 10812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens goto nomem; 10822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(nc % CHAR_BIT == 0); 10832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens nbytes = nc / CHAR_BIT * css; 10842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 10852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); 10862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (ptr == NULL) 10872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens goto nomem; 10882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->sets = ptr; 10892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 10902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens ptr = (uch *)realloc((char *)p->g->setbits, nbytes); 10912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (ptr == NULL) 10922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens goto nomem; 10932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->setbits = ptr; 10942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 10952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = 0; i < no; i++) 10962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 10972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 10982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); 10992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 11002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* XXX should not happen */ 11012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->g->sets == NULL || p->g->setbits == NULL) 11022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens goto nomem; 11032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cs = &p->g->sets[no]; 11052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 11062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cs->mask = 1 << ((no) % CHAR_BIT); 11072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cs->hash = 0; 11082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cs->smultis = 0; 11092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cs->multis = NULL; 11102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(cs); 11122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensnomem: 11132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens free(p->g->sets); 11142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->sets = NULL; 11152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens free(p->g->setbits); 11162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->setbits = NULL; 11172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ESPACE); 11192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* caller's responsibility not to do set ops */ 11202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(NULL); 11212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 11222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 11242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - freeset - free a now-unused set 11252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 11262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 11272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensfreeset(struct parse *p, cset *cs) 11282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 11292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t i; 11302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cset *top = &p->g->sets[p->g->ncsets]; 11312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t css = (size_t)p->g->csetsize; 11322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = 0; i < css; i++) 11342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens CHsub(cs, i); 11352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cs == top-1) /* recover only the easy case */ 11362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->g->ncsets--; 11372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 11382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 11402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - freezeset - final processing on a set of characters 11412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 11422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * The main task here is merging identical sets. This is usually a waste 11432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * of time (although the hash code minimizes the overhead), but can win 11442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 11452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * is done using addition rather than xor -- all ASCII [aA] sets xor to 11462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * the same value! 11472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 11482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int /* set number */ 11492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensfreezeset(struct parse *p, cset *cs) 11502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 11512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens uch h = cs->hash; 11522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t i; 11532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cset *top = &p->g->sets[p->g->ncsets]; 11542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cset *cs2; 11552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t css = (size_t)p->g->csetsize; 11562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* look for an earlier one which is the same */ 11582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 11592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cs2->hash == h && cs2 != cs) { 11602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* maybe */ 11612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = 0; i < css; i++) 11622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 11632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; /* no */ 11642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (i == css) 11652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; /* yes */ 11662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 11672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cs2 < top) { /* found one */ 11692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens freeset(p, cs); 11702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cs = cs2; 11712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 11722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return((int)(cs - p->g->sets)); 11742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 11752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 11772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - firstch - return first character in a set (which must have at least one) 11782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 11792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int /* character; there is no "none" value */ 11802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensfirstch(struct parse *p, cset *cs) 11812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 11822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t i; 11832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t css = (size_t)p->g->csetsize; 11842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = 0; i < css; i++) 11862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (CHIN(cs, i)) 11872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return((char)i); 11882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(never); 11892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(0); /* arbitrary */ 11902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 11912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 11922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 11932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - nch - number of characters in a set 11942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 11952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int 11962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensnch(struct parse *p, cset *cs) 11972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 11982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t i; 11992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t css = (size_t)p->g->csetsize; 12002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int n = 0; 12012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = 0; i < css; i++) 12032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (CHIN(cs, i)) 12042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens n++; 12052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(n); 12062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 12072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 12092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - mcadd - add a collating element to a cset 12102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 12112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 12122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensmcadd( struct parse *p, cset *cs, const char *cp) 12132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 12142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens size_t oldend = cs->smultis; 12152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens void *np; 12162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cs->smultis += strlen(cp) + 1; 12182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens np = realloc(cs->multis, cs->smultis); 12192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (np == NULL) { 12202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cs->multis) 12212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens free(cs->multis); 12222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cs->multis = NULL; 12232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ESPACE); 12242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 12252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 12262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cs->multis = np; 12272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); 12292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 12302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 12322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - mcinvert - invert the list of collating elements in a cset 12332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 12342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * This would have to know the set of possibilities. Implementation 12352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * is deferred. 12362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 12372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* ARGSUSED */ 12382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 12392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensmcinvert(struct parse *p, cset *cs) 12402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 12412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(cs->multis == NULL); /* xxx */ 12422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 12432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 12452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - mccase - add case counterparts of the list of collating elements in a cset 12462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 12472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * This would have to know the set of possibilities. Implementation 12482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * is deferred. 12492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 12502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* ARGSUSED */ 12512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 12522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensmccase(struct parse *p, cset *cs) 12532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 12542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(cs->multis == NULL); /* xxx */ 12552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 12562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 12582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - isinsets - is this character in any sets? 12592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 12602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int /* predicate */ 12612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensisinsets(struct re_guts *g, int c) 12622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 12632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens uch *col; 12642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int i; 12652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 12662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens unsigned uc = (uch)c; 12672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 12692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (col[uc] != 0) 12702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(1); 12712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(0); 12722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 12732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 12752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - samesets - are these two characters in exactly the same sets? 12762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 12772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic int /* predicate */ 12782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capenssamesets(struct re_guts *g, int c1, int c2) 12792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 12802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens uch *col; 12812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int i; 12822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 12832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens unsigned uc1 = (uch)c1; 12842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens unsigned uc2 = (uch)c2; 12852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 12872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (col[uc1] != col[uc2]) 12882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(0); 12892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(1); 12902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 12912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 12922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 12932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - categorize - sort out character categories 12942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 12952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 12962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capenscategorize(struct parse *p, struct re_guts *g) 12972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 12982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cat_t *cats = g->categories; 12992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int c; 13002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int c2; 13012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cat_t cat; 13022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* avoid making error situations worse */ 13042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->error != 0) 13052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 13062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (c = CHAR_MIN; c <= CHAR_MAX; c++) 13082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cats[c] == 0 && isinsets(g, c)) { 13092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cat = g->ncategories++; 13102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cats[c] = cat; 13112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (c2 = c+1; c2 <= CHAR_MAX; c2++) 13122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (cats[c2] == 0 && samesets(g, c, c2)) 13132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cats[c2] = cat; 13142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 13152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 13162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 13182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - dupl - emit a duplicate of a bunch of sops 13192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 13202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic sopno /* start of duplicate */ 13212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensdupl(struct parse *p, 13222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno start, /* from here */ 13232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno finish) /* to this less one */ 13242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 13252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno ret = HERE(); 13262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno len = finish - start; 13272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(finish >= start); 13292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (len == 0) 13302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(ret); 13312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens enlarge(p, p->ssize + len); /* this many unexpected additions */ 13322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(p->ssize >= p->slen + len); 13332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens (void) memmove((char *)(p->strip + p->slen), 13342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens (char *)(p->strip + start), (size_t)len*sizeof(sop)); 13352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->slen += len; 13362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(ret); 13372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 13382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 13402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - doemit - emit a strip operator 13412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 13422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * It might seem better to implement this as a macro with a function as 13432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * hard-case backup, but it's just too big and messy unless there are 13442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * some changes to the data structures. Maybe later. 13452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 13462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 13472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensdoemit(struct parse *p, sop op, size_t opnd) 13482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 13492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* avoid making error situations worse */ 13502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->error != 0) 13512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 13522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* deal with oversize operands ("can't happen", more or less) */ 13542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(opnd < 1<<OPSHIFT); 13552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* deal with undersized strip */ 13572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->slen >= p->ssize) 13582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 13592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(p->slen < p->ssize); 13602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* finally, it's all reduced to the easy case */ 13622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->strip[p->slen++] = SOP(op, opnd); 13632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 13642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 13662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - doinsert - insert a sop into the strip 13672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 13682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 13692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensdoinsert(struct parse *p, sop op, size_t opnd, sopno pos) 13702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 13712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno sn; 13722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sop s; 13732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens int i; 13742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* avoid making error situations worse */ 13762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->error != 0) 13772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 13782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sn = HERE(); 13802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens EMIT(op, opnd); /* do checks, ensure space */ 13812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(HERE() == sn+1); 13822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens s = p->strip[sn]; 13832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* adjust paren pointers */ 13852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(pos > 0); 13862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = 1; i < NPAREN; i++) { 13872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->pbegin[i] >= pos) { 13882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->pbegin[i]++; 13892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 13902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->pend[i] >= pos) { 13912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->pend[i]++; 13922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 13932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 13942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 13952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 13962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens (HERE()-pos-1)*sizeof(sop)); 13972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->strip[pos] = s; 13982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 13992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 14012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - dofwd - complete a forward reference 14022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 14032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 14042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensdofwd(struct parse *p, sopno pos, sop value) 14052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 14062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* avoid making error situations worse */ 14072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->error != 0) 14082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 14092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(value < 1<<OPSHIFT); 14112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->strip[pos] = OP(p->strip[pos]) | value; 14122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 14132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 14152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - enlarge - enlarge the strip 14162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 14172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 14182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensenlarge(struct parse *p, sopno size) 14192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 14202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sop *sp; 14212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->ssize >= size) 14232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 14242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if ((uintptr_t)size > SIZE_MAX / sizeof(sop)) { 14262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ESPACE); 14272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 14282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 14292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sp = (sop *)realloc(p->strip, size*sizeof(sop)); 14312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (sp == NULL) { 14322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ESPACE); 14332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 14342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 14352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->strip = sp; 14362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens p->ssize = size; 14372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 14382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 14402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - stripsnug - compact the strip 14412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 14422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 14432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstripsnug(struct parse *p, struct re_guts *g) 14442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 14452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->nstates = p->slen; 14462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if ((uintptr_t)p->slen > SIZE_MAX / sizeof(sop)) { 14472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->strip = p->strip; 14482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ESPACE); 14492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 14502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 14512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); 14532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (g->strip == NULL) { 14542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens SETERROR(REG_ESPACE); 14552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->strip = p->strip; 14562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 14572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 14582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 14602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - findmust - fill in must and mlen with longest mandatory literal string 14612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 14622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * This algorithm could do fancy things like analyzing the operands of | 14632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * for common subsequences. Someday. This code is simple and finds most 14642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * of the interesting cases. 14652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * 14662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens * Note that must and mlen got initialized during setup. 14672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 14682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic void 14692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensfindmust(struct parse *p, struct re_guts *g) 14702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 14712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sop *scan; 14722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sop *start = 0; /* start initialized in the default case, after that */ 14732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sop *newstart = 0; /* newstart was initialized in the OCHAR case */ 14742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno newlen; 14752df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sop s; 14762df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens char *cp; 14772df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno i; 14782df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14792df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* avoid making error situations worse */ 14802df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->error != 0) 14812df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 14822df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 14832df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* find the longest OCHAR sequence in strip */ 14842df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens newlen = 0; 14852df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens scan = g->strip + 1; 14862df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens do { 14872df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens s = *scan++; 14882df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens switch (OP(s)) { 14892df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case OCHAR: /* sequence member */ 14902df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (newlen == 0) /* new sequence */ 14912df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens newstart = scan - 1; 14922df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens newlen++; 14932df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 14942df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case OPLUS_: /* things that don't break one */ 14952df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case OLPAREN: 14962df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case ORPAREN: 14972df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 14982df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case OQUEST_: /* things that must be skipped */ 14992df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case OCH_: 15002df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens scan--; 15012df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens do { 15022df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens scan += OPND(s); 15032df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens s = *scan; 15042df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* assert() interferes w debug printouts */ 15052df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (OP(s) != O_QUEST && OP(s) != O_CH && 15062df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens OP(s) != OOR2) { 15072df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->iflags |= REGEX_BAD; 15082df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 15092df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 15102df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } while (OP(s) != O_QUEST && OP(s) != O_CH); 15112df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* fallthrough */ 15122df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens default: /* things that break a sequence */ 15132df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (newlen > g->mlen) { /* ends one */ 15142df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens start = newstart; 15152df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->mlen = newlen; 15162df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 15172df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens newlen = 0; 15182df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 15192df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 15202df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } while (OP(s) != OEND); 15212df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 15222df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (g->mlen == 0) /* there isn't one */ 15232df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 15242df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 15252df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens /* turn it into a character string */ 15262df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->must = malloc((size_t)g->mlen + 1); 15272df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (g->must == NULL) { /* argh; just forget it */ 15282df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->mlen = 0; 15292df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return; 15302df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 15312df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens cp = g->must; 15322df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens scan = start; 15332df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens for (i = g->mlen; i > 0; i--) { 15342df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens while (OP(s = *scan++) != OCHAR) 15352df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens continue; 15362df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(cp < g->must + g->mlen); 15372df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens *cp++ = (char)OPND(s); 15382df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 15392df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens assert(cp == g->must + g->mlen); 15402df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens *cp++ = '\0'; /* just on general principles */ 15412df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 15422df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 15432df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens/* 15442df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens - pluscount - count + nesting 15452df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens */ 15462df178997d17474042e8c3704cc93ab2db6619bfNicolas Capensstatic sopno /* nesting depth */ 15472df178997d17474042e8c3704cc93ab2db6619bfNicolas Capenspluscount(struct parse *p, struct re_guts *g) 15482df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens{ 15492df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sop *scan; 15502df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sop s; 15512df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno plusnest = 0; 15522df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens sopno maxnest = 0; 15532df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 15542df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (p->error != 0) 15552df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(0); /* there may not be an OEND */ 15562df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens 15572df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens scan = g->strip + 1; 15582df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens do { 15592df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens s = *scan++; 15602df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens switch (OP(s)) { 15612df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case OPLUS_: 15622df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens plusnest++; 15632df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 15642df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens case O_PLUS: 15652df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (plusnest > maxnest) 15662df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens maxnest = plusnest; 15672df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens plusnest--; 15682df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens break; 15692df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } 15702df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens } while (OP(s) != OEND); 15712df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens if (plusnest != 0) 15722df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens g->iflags |= REGEX_BAD; 15732df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens return(maxnest); 15742df178997d17474042e8c3704cc93ab2db6619bfNicolas Capens} 1575