onepass.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2008 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Tested by search_test.cc.
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Prog::SearchOnePass is an efficient implementation of
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expression search with submatch tracking for
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// what I call "one-pass regular expressions".  (An alternate
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// name might be "backtracking-free regular expressions".)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// One-pass regular expressions have the property that
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// at each input byte during an anchored match, there may be
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// multiple alternatives but only one can proceed for any
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// given input byte.
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// For example, the regexp /x*yx*/ is one-pass: you read
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// x's until a y, then you read the y, then you keep reading x's.
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// At no point do you have to guess what to do or back up
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and try a different guess.
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// On the other hand, /x*x/ is not one-pass: when you're
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// looking at an input "x", it's not clear whether you should
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// use it to extend the x* or as the final x.
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// More examples: /([^ ]*) (.*)/ is one-pass; /(.*) (.*)/ is not.
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// /(\d+)-(\d+)/ is one-pass; /(\d+).(\d+)/ is not.
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A simple intuition for identifying one-pass regular expressions
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is that it's always immediately obvious when a repetition ends.
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// It must also be immediately obvious which branch of an | to take:
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// /x(y|z)/ is one-pass, but /(xy|xz)/ is not.
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The NFA-based search in nfa.cc does some bookkeeping to
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// avoid the need for backtracking and its associated exponential blowup.
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// But if we have a one-pass regular expression, there is no
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// possibility of backtracking, so there is no need for the
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// extra bookkeeping.  Hence, this code.
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// On a one-pass regular expression, the NFA code in nfa.cc
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// runs at about 1/20 of the backtracking-based PCRE speed.
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// In contrast, the code in this file runs at about the same
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// speed as PCRE.
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// One-pass regular expressions get used a lot when RE is
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// used for parsing simple strings, so it pays off to
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// notice them and handle them efficiently.
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// See also Anne Brüggemann-Klein and Derick Wood,
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// "One-unambiguous regular languages", Information and Computation 142(2).
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string.h>
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <map>
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h"
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/arena.h"
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/sparse_set.h"
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h"
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/stringpiece.h"
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int Debug = 0;
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The key insight behind this implementation is that the
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// non-determinism in an NFA for a one-pass regular expression
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is contained.  To explain what that means, first a
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// refresher about what regular expression programs look like
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and how the usual NFA execution runs.
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// In a regular expression program, only the kInstByteRange
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// instruction processes an input byte c and moves on to the
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// next byte in the string (it does so if c is in the given range).
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The kInstByteRange instructions correspond to literal characters
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and character classes in the regular expression.
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The kInstAlt instructions are used as wiring to connect the
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// kInstByteRange instructions together in interesting ways when
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// implementing | + and *.
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The kInstAlt instruction forks execution, like a goto that
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// jumps to ip->out() and ip->out1() in parallel.  Each of the
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// resulting computation paths is called a thread.
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The other instructions -- kInstEmptyWidth, kInstMatch, kInstCapture --
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// are interesting in their own right but like kInstAlt they don't
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// advance the input pointer.  Only kInstByteRange does.
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The automaton execution in nfa.cc runs all the possible
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// threads of execution in lock-step over the input.  To process
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a particular byte, each thread gets run until it either dies
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// or finds a kInstByteRange instruction matching the byte.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If the latter happens, the thread stops just past the
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// kInstByteRange instruction (at ip->out()) and waits for
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the other threads to finish processing the input byte.
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Then, once all the threads have processed that input byte,
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the whole process repeats.  The kInstAlt state instruction
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// might create new threads during input processing, but no
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// matter what, all the threads stop after a kInstByteRange
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and wait for the other threads to "catch up".
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Running in lock step like this ensures that the NFA reads
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the input string only once.
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Each thread maintains its own set of capture registers
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (the string positions at which it executed the kInstCapture
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// instructions corresponding to capturing parentheses in the
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regular expression).  Repeated copying of the capture registers
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is the main performance bottleneck in the NFA implementation.
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A regular expression program is "one-pass" if, no matter what
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the input string, there is only one thread that makes it
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// past a kInstByteRange instruction at each input byte.  This means
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// that there is in some sense only one active thread throughout
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the execution.  Other threads might be created during the
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// processing of an input byte, but they are ephemeral: only one
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// thread is left to start processing the next input byte.
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This is what I meant above when I said the non-determinism
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// was "contained".
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To execute a one-pass regular expression program, we can build
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a DFA (no non-determinism) that has at most as many states as
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the NFA (compare this to the possibly exponential number of states
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// in the general case).  Each state records, for each possible
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// input byte, the next state along with the conditions required
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// before entering that state -- empty-width flags that must be true
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and capture operations that must be performed.  It also records
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// whether a set of conditions required to finish a match at that
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// point in the input rather than process the next byte.
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A state in the one-pass NFA (aka DFA) - just an array of actions.
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct OneState;
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A state in the one-pass NFA - just an array of actions indexed
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// by the bytemap_[] of the next input byte.  (The bytemap
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// maps next input bytes into equivalence classes, to reduce
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the memory footprint.)
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct OneState {
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 matchcond;   // conditions to match right now.
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 action[1];
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The uint32 conditions in the action are a combination of
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// condition and capture bits and the next state.  The bottom 16 bits
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// are the condition and capture bits, and the top 16 are the index of
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the next state.
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Bits 0-5 are the empty-width flags from prog.h.
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Bit 6 is kMatchWins, which means the match takes
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// priority over moving to next in a first-match search.
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The remaining bits mark capture registers that should
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// be set to the current input position.  The capture bits
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// start at index 2, since the search loop can take care of
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// cap[0], cap[1] (the overall match position).
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// That means we can handle up to 5 capturing parens: $1 through $4, plus $0.
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// No input position can satisfy both kEmptyWordBoundary
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and kEmptyNonWordBoundary, so we can use that as a sentinel
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// instead of needing an extra bit.
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int    kIndexShift    = 16;  // number of bits below index
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int    kEmptyShift   = 6;  // number of empty flags in prog.h
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int    kRealCapShift = kEmptyShift + 1;
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int    kRealMaxCap   = (kIndexShift - kRealCapShift) / 2 * 2;
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Parameters used to skip over cap[0], cap[1].
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int    kCapShift     = kRealCapShift - 2;
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const int    kMaxCap       = kRealMaxCap + 2;
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const uint32 kMatchWins    = 1 << kEmptyShift;
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const uint32 kCapMask      = ((1 << kRealMaxCap) - 1) << kRealCapShift;
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static const uint32 kImpossible   = kEmptyWordBoundary | kEmptyNonWordBoundary;
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Check, at compile time, that prog.h agrees with math above.
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This function is never called.
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void OnePass_Checks() {
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  COMPILE_ASSERT((1<<kEmptyShift)-1 == kEmptyAllFlags,
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 kEmptyShift_disagrees_with_kEmptyAllFlags);
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // kMaxCap counts pointers, kMaxOnePassCapture counts pairs.
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  COMPILE_ASSERT(kMaxCap == Prog::kMaxOnePassCapture*2,
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                 kMaxCap_disagrees_with_kMaxOnePassCapture);
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static bool Satisfy(uint32 cond, const StringPiece& context, const char* p) {
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 satisfied = Prog::EmptyFlags(context, p);
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (cond & kEmptyAllFlags & ~satisfied)
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Apply the capture bits in cond, saving p to the appropriate
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// locations in cap[].
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static void ApplyCaptures(uint32 cond, const char* p,
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          const char** cap, int ncap) {
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 2; i < ncap; i++)
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (cond & (1 << kCapShift << i))
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      cap[i] = p;
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Compute a node pointer.
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Basically (OneState*)(nodes + statesize*nodeindex)
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// but the version with the C++ casts overflows 80 characters (and is ugly).
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline OneState* IndexToNode(volatile uint8* nodes, int statesize,
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    int nodeindex) {
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return reinterpret_cast<OneState*>(
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const_cast<uint8*>(nodes + statesize*nodeindex));
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Prog::SearchOnePass(const StringPiece& text,
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         const StringPiece& const_context,
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         Anchor anchor, MatchKind kind,
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         StringPiece* match, int nmatch) {
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (anchor != kAnchored && kind != kFullMatch) {
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG(DFATAL) << "Cannot use SearchOnePass for unanchored matches.";
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Make sure we have at least cap[1],
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // because we use it to tell if we matched.
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int ncap = 2*nmatch;
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (ncap < 2)
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ncap = 2;
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* cap[kMaxCap];
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < ncap; i++)
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cap[i] = NULL;
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* matchcap[kMaxCap];
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < ncap; i++)
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    matchcap[i] = NULL;
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece context = const_context;
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (context.begin() == NULL)
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    context = text;
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (anchor_start() && context.begin() != text.begin())
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (anchor_end() && context.end() != text.end())
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (anchor_end())
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    kind = kFullMatch;
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // State and act are marked volatile to
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // keep the compiler from re-ordering the
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // memory accesses walking over the NFA.
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This is worth about 5%.
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  volatile OneState* state = onepass_start_;
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  volatile uint8* nodes = onepass_nodes_;
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  volatile uint32 statesize = onepass_statesize_;
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint8* bytemap = bytemap_;
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* bp = text.begin();
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* ep = text.end();
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* p;
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool matched = false;
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  matchcap[0] = bp;
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  cap[0] = bp;
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 nextmatchcond = state->matchcond;
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (p = bp; p < ep; p++) {
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int c = bytemap[*p & 0xFF];
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32 matchcond = nextmatchcond;
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32 cond = state->action[c];
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Determine whether we can reach act->next.
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // If so, advance state and nextmatchcond.
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((cond & kEmptyAllFlags) == 0 || Satisfy(cond, context, p)) {
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      uint32 nextindex = cond >> kIndexShift;
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      state = IndexToNode(nodes, statesize, nextindex);
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      nextmatchcond = state->matchcond;
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      state = NULL;
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      nextmatchcond = kImpossible;
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // This code section is carefully tuned.
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // The goto sequence is about 10% faster than the
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // obvious rewrite as a large if statement in the
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // ASCIIMatchRE2 and DotMatchRE2 benchmarks.
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Saving the match capture registers is expensive.
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Is this intermediate match worth thinking about?
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Not if we want a full match.
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (kind == kFullMatch)
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      goto skipmatch;
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Not if it's impossible.
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (matchcond == kImpossible)
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      goto skipmatch;
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Not if the possible match is beaten by the certain
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // match at the next byte.  When this test is useless
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // (e.g., HTTPPartialMatchRE2) it slows the loop by
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // about 10%, but when it avoids work (e.g., DotMatchRE2),
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // it cuts the loop execution by about 45%.
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((cond & kMatchWins) == 0 && (nextmatchcond & kEmptyAllFlags) == 0)
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      goto skipmatch;
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Finally, the match conditions must be satisfied.
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p)) {
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (int i = 2; i < 2*nmatch; i++)
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        matchcap[i] = cap[i];
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (nmatch > 1 && (matchcond & kCapMask))
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ApplyCaptures(matchcond, p, matchcap, ncap);
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      matchcap[1] = p;
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      matched = true;
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // If we're in longest match mode, we have to keep
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // going and see if we find a longer match.
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // In first match mode, we can stop if the match
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // takes priority over the next state for this input byte.
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // That bit is per-input byte and thus in cond, not matchcond.
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (kind == kFirstMatch && (cond & kMatchWins))
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        goto done;
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  skipmatch:
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (state == NULL)
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      goto done;
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((cond & kCapMask) && nmatch > 1)
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ApplyCaptures(cond, p, cap, ncap);
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Look for match at end of input.
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  {
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32 matchcond = state->matchcond;
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (matchcond != kImpossible &&
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ((matchcond & kEmptyAllFlags) == 0 || Satisfy(matchcond, context, p))) {
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (nmatch > 1 && (matchcond & kCapMask))
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ApplyCaptures(matchcond, p, cap, ncap);
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (int i = 2; i < ncap; i++)
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        matchcap[i] = cap[i];
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      matchcap[1] = p;
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      matched = true;
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)done:
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!matched)
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < nmatch; i++)
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    match[i].set(matchcap[2*i], matchcap[2*i+1] - matchcap[2*i]);
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Analysis to determine whether a given regexp program is one-pass.
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If ip is not on workq, adds ip to work queue and returns true.
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If ip is already on work queue, does nothing and returns false.
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If ip is NULL, does nothing and returns true (pretends to add it).
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef SparseSet Instq;
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static bool AddQ(Instq *q, int id) {
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (id == 0)
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (q->contains(id))
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  q->insert(id);
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct InstCond {
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int id;
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 cond;
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns whether this is a one-pass program; that is,
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// returns whether it is safe to use SearchOnePass on this program.
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// These conditions must be true for any instruction ip:
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   (1) for any other Inst nip, there is at most one input-free
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       path from ip to nip.
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   (2) there is at most one kInstByte instruction reachable from
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       ip that matches any particular byte c.
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   (3) there is at most one input-free path from ip to a kInstMatch
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       instruction.
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This is actually just a conservative approximation: it might
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// return false when the answer is true, when kInstEmptyWidth
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// instructions are involved.
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Constructs and saves corresponding one-pass NFA on success.
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Prog::IsOnePass() {
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (did_onepass_)
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return onepass_start_ != NULL;
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  did_onepass_ = true;
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (start() == 0)  // no match
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Steal memory for the one-pass NFA from the overall DFA budget.
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Willing to use at most 1/4 of the DFA budget (heuristic).
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Limit max node count to 65000 as a conservative estimate to
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // avoid overflowing 16-bit node index in encoding.
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int maxnodes = 2 + byte_inst_count_;
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int statesize = sizeof(OneState) + (bytemap_range_-1)*sizeof(uint32);
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes)
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Flood the graph starting at the start state, and check
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that in each reachable state, each possible byte leads
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // to a unique next state.
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int size = this->size();
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  InstCond *stack = new InstCond[size];
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int* nodebyid = new int[size];  // indexed by ip
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memset(nodebyid, 0xFF, size*sizeof nodebyid[0]);
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint8* nodes = new uint8[maxnodes*statesize];
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint8* nodep = nodes;
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instq tovisit(size), workq(size);
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  AddQ(&tovisit, start());
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  nodebyid[start()] = 0;
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  nodep += statesize;
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nalloc = 1;
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int id = *it;
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int nodeindex = nodebyid[id];
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    OneState* node = IndexToNode(nodes, statesize, nodeindex);
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Flood graph using manual stack, filling in actions as found.
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Default is none.
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int b = 0; b < bytemap_range_; b++)
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      node->action[b] = kImpossible;
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    node->matchcond = kImpossible;
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    workq.clear();
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool matched = false;
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int nstack = 0;
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stack[nstack].id = id;
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    stack[nstack++].cond = 0;
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (nstack > 0) {
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int id = stack[--nstack].id;
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Prog::Inst* ip = inst(id);
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      uint32 cond = stack[nstack].cond;
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      switch (ip->opcode()) {
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        case kInstAltMatch:
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // TODO(rsc): Ignoring kInstAltMatch optimization.
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Should implement it in this engine, but it's subtle.
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Fall through.
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        case kInstAlt:
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // If already on work queue, (1) is violated: bail out.
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (!AddQ(&workq, ip->out()) || !AddQ(&workq, ip->out1()))
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            goto fail;
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          stack[nstack].id = ip->out1();
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          stack[nstack++].cond = cond;
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          stack[nstack].id = ip->out();
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          stack[nstack++].cond = cond;
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          break;
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        case kInstByteRange: {
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          int nextindex = nodebyid[ip->out()];
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (nextindex == -1) {
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (nalloc >= maxnodes) {
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              if (Debug)
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                LOG(ERROR)
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << StringPrintf("Not OnePass: hit node limit %d > %d",
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  nalloc, maxnodes);
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              goto fail;
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            nextindex = nalloc;
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            nodep += statesize;
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            nodebyid[ip->out()] = nextindex;
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            nalloc++;
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            AddQ(&tovisit, ip->out());
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          }
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (matched)
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            cond |= kMatchWins;
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          for (int c = ip->lo(); c <= ip->hi(); c++) {
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            int b = bytemap_[c];
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            c = unbytemap_[b];  // last c in byte class
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            uint32 act = node->action[b];
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            uint32 newact = (nextindex << kIndexShift) | cond;
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if ((act & kImpossible) == kImpossible) {
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              node->action[b] = newact;
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            } else if (act != newact) {
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              if (Debug) {
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                LOG(ERROR)
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  << StringPrintf("Not OnePass: conflict on byte "
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  "%#x at state %d",
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  c, *it);
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              }
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              goto fail;
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          }
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (ip->foldcase()) {
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            Rune lo = max<Rune>(ip->lo(), 'a') + 'A' - 'a';
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            Rune hi = min<Rune>(ip->hi(), 'z') + 'A' - 'a';
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            for (int c = lo; c <= hi; c++) {
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              int b = bytemap_[c];
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              c = unbytemap_[b];  // last c in class
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              uint32 act = node->action[b];
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              uint32 newact = (nextindex << kIndexShift) | cond;
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              if ((act & kImpossible) == kImpossible) {
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                node->action[b] = newact;
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              } else if (act != newact) {
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                if (Debug) {
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  LOG(ERROR)
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    << StringPrintf("Not OnePass: conflict on byte "
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    "%#x at state %d",
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    c, *it);
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                }
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                goto fail;
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              }
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          }
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          break;
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        case kInstCapture:
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (ip->cap() < kMaxCap)
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            cond |= (1 << kCapShift) << ip->cap();
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          goto QueueEmpty;
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        case kInstEmptyWidth:
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          cond |= ip->empty();
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          goto QueueEmpty;
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        case kInstNop:
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        QueueEmpty:
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // kInstCapture and kInstNop always proceed to ip->out().
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // kInstEmptyWidth only sometimes proceeds to ip->out(),
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // but as a conservative approximation we assume it always does.
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // We could be a little more precise by looking at what c
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // is, but that seems like overkill.
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // If already on work queue, (1) is violated: bail out.
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (!AddQ(&workq, ip->out())) {
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (Debug) {
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              LOG(ERROR) << StringPrintf("Not OnePass: multiple paths"
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                         " %d -> %d\n",
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                         *it, ip->out());
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            goto fail;
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          }
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          stack[nstack].id = ip->out();
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          stack[nstack++].cond = cond;
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          break;
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        case kInstMatch:
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (matched) {
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // (3) is violated
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (Debug) {
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              LOG(ERROR) << StringPrintf("Not OnePass: multiple matches"
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                         " from %d\n", *it);
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            goto fail;
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          }
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          matched = true;
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          node->matchcond = cond;
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          break;
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        case kInstFail:
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          break;
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (Debug) {  // For debugging, dump one-pass NFA to LOG(ERROR).
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    string dump = "prog dump:\n" + Dump() + "node dump\n";
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    map<int, int> idmap;
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int i = 0; i < size; i++)
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (nodebyid[i] != -1)
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        idmap[nodebyid[i]] = i;
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StringAppendF(&dump, "byte ranges:\n");
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int i = 0;
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int b = 0; b < bytemap_range_; b++) {
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int lo = i;
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (bytemap_[i] == b)
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        i++;
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      StringAppendF(&dump, "\t%d: %#x-%#x\n", b, lo, i - 1);
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (Instq::iterator it = tovisit.begin(); it != tovisit.end(); ++it) {
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int id = *it;
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      int nodeindex = nodebyid[id];
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (nodeindex == -1)
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      	continue;
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      OneState* node = IndexToNode(nodes, statesize, nodeindex);
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      string s;
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      StringAppendF(&dump, "node %d id=%d: matchcond=%#x\n",
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    nodeindex, id, node->matchcond);
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (int i = 0; i < bytemap_range_; i++) {
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if ((node->action[i] & kImpossible) == kImpossible)
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          continue;
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        StringAppendF(&dump, "  %d cond %#x -> %d id=%d\n",
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      i, node->action[i] & 0xFFFF,
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      node->action[i] >> kIndexShift,
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      idmap[node->action[i] >> kIndexShift]);
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG(ERROR) << dump;
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Overallocated earlier; cut down to actual size.
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  nodep = new uint8[nalloc*statesize];
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memmove(nodep, nodes, nalloc*statesize);
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete[] nodes;
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  nodes = nodep;
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  onepass_start_ = IndexToNode(nodes, statesize, nodebyid[start()]);
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  onepass_nodes_ = nodes;
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  onepass_statesize_ = statesize;
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  dfa_mem_ -= nalloc*statesize;
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete[] stack;
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete[] nodebyid;
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)fail:
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete[] stack;
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete[] nodebyid;
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete[] nodes;
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
615