1ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/*- 2ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * This code is derived from OpenBSD's libc/regex, original license follows: 3ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * 4ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * Copyright (c) 1992, 1993, 1994 Henry Spencer. 5ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * Copyright (c) 1992, 1993, 1994 6ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * The Regents of the University of California. All rights reserved. 7ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * 8ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * This code is derived from software contributed to Berkeley by 9ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * Henry Spencer. 10ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * 11ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * Redistribution and use in source and binary forms, with or without 12ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * modification, are permitted provided that the following conditions 13ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * are met: 14ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * 1. Redistributions of source code must retain the above copyright 15ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * notice, this list of conditions and the following disclaimer. 16ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * 2. Redistributions in binary form must reproduce the above copyright 17ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * notice, this list of conditions and the following disclaimer in the 18ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * documentation and/or other materials provided with the distribution. 19ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * 3. Neither the name of the University nor the names of its contributors 20ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * may be used to endorse or promote products derived from this software 21ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * without specific prior written permission. 22ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * 23ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * SUCH DAMAGE. 34ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * 35ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * @(#)regexec.c 8.3 (Berkeley) 3/20/94 36ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin */ 37ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 38ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* 39ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * the outer shell of llvm_regexec() 40ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * 41ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * This file includes engine.inc *twice*, after muchos fiddling with the 42ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * macros that code uses. This lets the same code operate on two different 43ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * representations for state sets. 44ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin */ 45ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include <sys/types.h> 46ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include <stdio.h> 47ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include <stdlib.h> 48ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include <string.h> 49ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include <limits.h> 50ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include <ctype.h> 51ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include "regex_impl.h" 52ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 53ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include "regutils.h" 54ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include "regex2.h" 55ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 56ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* macros for manipulating states, small version */ 57ad3ea3d85f164dc1b2746950e378334197d8688dNAKAMURA Takumi/* FIXME: 'states' is assumed as 'long' on small version. */ 58ad3ea3d85f164dc1b2746950e378334197d8688dNAKAMURA Takumi#define states1 long /* for later use in llvm_regexec() decision */ 59ad3ea3d85f164dc1b2746950e378334197d8688dNAKAMURA Takumi#define states states1 60ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define CLEAR(v) ((v) = 0) 61ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) 62ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define SET1(v, n) ((v) |= (unsigned long)1 << (n)) 63ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) 64ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define ASSIGN(d, s) ((d) = (s)) 65ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define EQ(a, b) ((a) == (b)) 66ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define STATEVARS long dummy /* dummy version */ 67ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define STATESETUP(m, n) /* nothing */ 68ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define STATETEARDOWN(m) /* nothing */ 69ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define SETUP(v) ((v) = 0) 70ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define onestate long 71ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define INIT(o, n) ((o) = (unsigned long)1 << (n)) 721144af3c9b4da48cd581156e05b24261c8de366aRichard Smith#define INC(o) ((o) = (unsigned long)(o) << 1) 73ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define ISSTATEIN(v, o) (((v) & (o)) != 0) 74ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* some abbreviations; note that some of these know variable names! */ 75ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* do "if I'm here, I can also be there" etc without branches */ 76ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) 77ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) 78ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) 79ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* function names */ 80ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define SNAMES /* engine.inc looks after details */ 81ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 82ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include "regengine.inc" 83ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 84ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* now undo things */ 85ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef states 86ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef CLEAR 87ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef SET0 88ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef SET1 89ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef ISSET 90ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef ASSIGN 91ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef EQ 92ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef STATEVARS 93ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef STATESETUP 94ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef STATETEARDOWN 95ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef SETUP 96ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef onestate 97ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef INIT 98ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef INC 99ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef ISSTATEIN 100ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef FWD 101ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef BACK 102ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef ISSETBACK 103ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#undef SNAMES 104ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 105ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* macros for manipulating states, large version */ 106ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define states char * 107ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define CLEAR(v) memset(v, 0, m->g->nstates) 108ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define SET0(v, n) ((v)[n] = 0) 109ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define SET1(v, n) ((v)[n] = 1) 110ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define ISSET(v, n) ((v)[n]) 111ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define ASSIGN(d, s) memmove(d, s, m->g->nstates) 112ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) 113ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define STATEVARS long vn; char *space 114ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ 115ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin if ((m)->space == NULL) return(REG_ESPACE); \ 116ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin (m)->vn = 0; } 117ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define STATETEARDOWN(m) { free((m)->space); } 118ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) 119ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define onestate long 120ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define INIT(o, n) ((o) = (n)) 121ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define INC(o) ((o)++) 122ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define ISSTATEIN(v, o) ((v)[o]) 123ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* some abbreviations; note that some of these know variable names! */ 124ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* do "if I'm here, I can also be there" etc without branches */ 125ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) 126ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) 127ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define ISSETBACK(v, n) ((v)[here - (n)]) 128ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* function names */ 129ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#define LNAMES /* flag */ 130ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 131ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#include "regengine.inc" 132ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 133ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin/* 134ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin - llvm_regexec - interface for matching 135ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * 136ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * We put this here so we can exploit knowledge of the state representation 137ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * when choosing which matcher to call. Also, by this point the matchers 138ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin * have been prototyped. 139ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin */ 140ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinint /* 0 success, REG_NOMATCH failure */ 141ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwinllvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch, 142ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin llvm_regmatch_t pmatch[], int eflags) 143ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin{ 144ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin struct re_guts *g = preg->re_g; 145ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#ifdef REDEBUG 146ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin# define GOODFLAGS(f) (f) 147ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#else 148ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) 149ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin#endif 150ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 151ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) 152ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin return(REG_BADPAT); 153ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin assert(!(g->iflags®EX_BAD)); 154ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin if (g->iflags®EX_BAD) /* backstop for no-debug case */ 155ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin return(REG_BADPAT); 156ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin eflags = GOODFLAGS(eflags); 157ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin 158ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags®_LARGE)) 159d2520dcc701da54862e3e73b16d58c25474417beOwen Anderson return(smatcher(g, string, nmatch, pmatch, eflags)); 160ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin else 161d2520dcc701da54862e3e73b16d58c25474417beOwen Anderson return(lmatcher(g, string, nmatch, pmatch, eflags)); 162ce0c81e7dd321e9f94f628daa5528f56cab0ab88Torok Edwin} 163