15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2006-2007 The RE2 Authors. All Rights Reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Tested by search_test.cc. 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Prog::SearchNFA, an NFA search. 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This is an actual NFA like the theorists talk about, 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// not the pseudo-NFA found in backtracking regexp implementations. 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// IMPLEMENTATION 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This algorithm is a variant of one that appeared in Rob Pike's sam editor, 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// which is a variant of the one described in Thompson's 1968 CACM paper. 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// See http://swtch.com/~rsc/regexp/ for various history. The main feature 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// over the DFA implementation is that it tracks submatch boundaries. 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// When the choice of submatch boundaries is ambiguous, this particular 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// implementation makes the same choices that traditional backtracking 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// implementations (in particular, Perl and PCRE) do. 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// time in the length of the input. 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Like Thompson's original machine and like the DFA implementation, this 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// implementation notices a match only once it is one byte past it. 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h" 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h" 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/sparse_array.h" 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/sparse_set.h" 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 { 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class NFA { 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NFA(Prog* prog); 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~NFA(); 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Searches for a matching string. 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * If anchored is true, only considers matches starting at offset. 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Otherwise finds lefmost match at or after offset. 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // * If longest is true, returns the longest match starting 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // at the chosen start point. Otherwise returns the so-called 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // left-biased match, the one traditional backtracking engines 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (like Perl and PCRE) find. 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Records submatch boundaries in submatch[1..nsubmatch-1]. 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Submatch[0] is the entire match. When there is a choice in 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // which text matches each subexpression, the submatch boundaries 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // are chosen to match what a backtracking implementation would choose. 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Search(const StringPiece& text, const StringPiece& context, 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool anchored, bool longest, 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* submatch, int nsubmatch); 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const int Debug = 0; 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) struct Thread { 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) union { 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int id; 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Thread* next; // when on free list 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char** capture; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // State for explicit stack in AddToThreadq. 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) struct AddState { 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int id; // Inst to process 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int j; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* cap_j; // if j>=0, set capture[j] = cap_j before processing ip 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AddState() 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : id(0), j(-1), cap_j(NULL) {} 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) explicit AddState(int id) 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : id(id), j(-1), cap_j(NULL) {} 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AddState(int id, const char* cap_j, int j) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : id(id), j(j), cap_j(cap_j) {} 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Threadq is a list of threads. The list is sorted by the order 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // in which Perl would explore that particular state -- the earlier 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // choices appear earlier in the list. 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) typedef SparseArray<Thread*> Threadq; 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline Thread* AllocThread(); 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void FreeThread(Thread*); 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Add id (or its children, following unlabeled arrows) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to the workqueue q with associated capture info. 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void AddToThreadq(Threadq* q, int id, int flag, 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* p, const char** capture); 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Run runq on byte c, appending new states to nextq. 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Updates matched_ and match_ as new, better matches are found. 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // p is position of the next byte (the one after c) 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // in the input string, used when processing capturing parens. 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // flag is the bitwise or of Bol, Eol, etc., specifying whether 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // ^, $ and \b match the current input point (after c). 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline int Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p); 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns text version of capture information, for debugging. 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string FormatCapture(const char** capture); 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void CopyCapture(const char** dst, const char** src); 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Computes whether all matches must begin with the same first 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // byte, and if so, returns that byte. If not, returns -1. 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ComputeFirstByte(); 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog* prog_; // underlying program 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int start_; // start instruction in program 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ncapture_; // number of submatches to track 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool longest_; // whether searching for longest match 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool endmatch_; // whether match must end at text.end() 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* btext_; // beginning of text being matched (for FormatSubmatch) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* etext_; // end of text being matched (for endmatch_) 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Threadq q0_, q1_; // pre-allocated for Search. 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char** match_; // best match so far 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool matched_; // any match so far? 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AddState* astack_; // pre-allocated for AddToThreadq 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nastack_; 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int first_byte_; // required first byte for match, or -1 if none 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Thread* free_threads_; // free list 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_EVIL_CONSTRUCTORS(NFA); 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NFA::NFA(Prog* prog) { 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prog_ = prog; 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) start_ = prog->start(); 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ncapture_ = 0; 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest_ = false; 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) endmatch_ = false; 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) btext_ = NULL; 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) etext_ = NULL; 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q0_.resize(prog_->size()); 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q1_.resize(prog_->size()); 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nastack_ = 2*prog_->size(); 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) astack_ = new AddState[nastack_]; 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) match_ = NULL; 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matched_ = false; 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) free_threads_ = NULL; 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) first_byte_ = ComputeFirstByte(); 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NFA::~NFA() { 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] match_; 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] astack_; 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Thread* next; 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (Thread* t = free_threads_; t; t = next) { 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) next = t->next; 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] t->capture; 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete t; 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void NFA::FreeThread(Thread *t) { 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (t == NULL) 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t->next = free_threads_; 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) free_threads_ = t; 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)NFA::Thread* NFA::AllocThread() { 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Thread* t = free_threads_; 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (t == NULL) { 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t = new Thread; 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t->capture = new const char*[ncapture_]; 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return t; 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) free_threads_ = t->next; 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return t; 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void NFA::CopyCapture(const char** dst, const char** src) { 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < ncapture_; i+=2) { 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dst[i] = src[i]; 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dst[i+1] = src[i+1]; 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Follows all empty arrows from id0 and enqueues all the states reached. 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The bits in flag (Bol, Eol, etc.) specify whether ^, $ and \b match. 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The pointer p is the current input position, and m is the 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// current set of match boundaries. 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void NFA::AddToThreadq(Threadq* q, int id0, int flag, 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* p, const char** capture) { 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (id0 == 0) 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Astack_ is pre-allocated to avoid resize operations. 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // It has room for 2*prog_->size() entries, which is enough: 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Each inst in prog can be processed at most once, 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // pushing at most two entries on stk. 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nstk = 0; 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AddState* stk = astack_; 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stk[nstk++] = AddState(id0); 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (nstk > 0) { 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK_LE(nstk, nastack_); 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const AddState& a = stk[--nstk]; 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (a.j >= 0) 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) capture[a.j] = a.cap_j; 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int id = a.id; 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (id == 0) 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (q->has_index(id)) { 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (Debug) 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fprintf(stderr, " [%d%s]\n", id, FormatCapture(capture).c_str()); 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Create entry in q no matter what. We might fill it in below, 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // or we might not. Even if not, it is necessary to have it, 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // so that we don't revisit id0 during the recursion. 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q->set_new(id, NULL); 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Thread** tp = &q->find(id)->second; 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int j; 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Thread* t; 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::Inst* ip = prog_->inst(id); 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (ip->opcode()) { 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq"; 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstFail: 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstAltMatch: 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Save state; will pick up at next byte. 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t = AllocThread(); 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t->id = id; 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CopyCapture(t->capture, capture); 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *tp = t; 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // fall through 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstAlt: 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Explore alternatives. 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stk[nstk++] = AddState(ip->out1()); 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stk[nstk++] = AddState(ip->out()); 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstNop: 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Continue on. 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stk[nstk++] = AddState(ip->out()); 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstCapture: 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((j=ip->cap()) < ncapture_) { 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Push a dummy whose only job is to restore capture[j] 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // once we finish exploring this possibility. 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stk[nstk++] = AddState(0, capture[j], j); 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Record capture. 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) capture[j] = p; 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stk[nstk++] = AddState(ip->out()); 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstMatch: 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstByteRange: 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Save state; will pick up at next byte. 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t = AllocThread(); 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t->id = id; 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CopyCapture(t->capture, capture); 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *tp = t; 2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (Debug) 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fprintf(stderr, " + %d%s [%p]\n", id, FormatCapture(t->capture).c_str(), t); 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstEmptyWidth: 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Continue on if we have all the right flag bits. 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->empty() & ~flag) 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stk[nstk++] = AddState(ip->out()); 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Run runq on byte c, appending new states to nextq. 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Updates match as new, better matches are found. 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// p is position of the byte c in the input string, 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// used when processing capturing parens. 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// flag is the bitwise or of Bol, Eol, etc., specifying whether 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ^, $ and \b match the current input point (after c). 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Frees all the threads on runq. 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If there is a shortcut to the end, returns that shortcut. 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int NFA::Step(Threadq* runq, Threadq* nextq, int c, int flag, const char* p) { 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nextq->clear(); 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Thread* t = i->second; 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (t == NULL) 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (longest_) { 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Can skip any threads started after our current best match. 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (matched_ && match_[0] < t->capture[0]) { 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FreeThread(t); 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int id = t->id; 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::Inst* ip = prog_->inst(id); 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (ip->opcode()) { 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Should only see the values handled below. 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step"; 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstByteRange: 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->Matches(c)) 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AddToThreadq(nextq, ip->out(), flag, p+1, t->capture); 3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstAltMatch: 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (i != runq->begin()) 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The match is ours if we want it. 3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->greedy(prog_) || longest_) { 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CopyCapture((const char**)match_, t->capture); 3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FreeThread(t); 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (++i; i != runq->end(); ++i) 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FreeThread(i->second); 3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) runq->clear(); 3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matched_ = true; 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->greedy(prog_)) 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ip->out1(); 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ip->out(); 3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstMatch: 3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (endmatch_ && p != etext_) 3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* old = t->capture[1]; // previous end pointer 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t->capture[1] = p; 3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (longest_) { 3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Leftmost-longest mode: save this match only if 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // it is either farther to the left or at the same 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // point but longer than an existing match. 3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!matched_ || t->capture[0] < match_[0] || 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (t->capture[0] == match_[0] && t->capture[1] > match_[1])) 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CopyCapture((const char**)match_, t->capture); 3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Leftmost-biased mode: this match is by definition 3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // better than what we've already found (see next line). 3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CopyCapture((const char**)match_, t->capture); 3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Cut off the threads that can only find matches 3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // worse than the one we just found: don't run the 3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // rest of the current Threadq. 3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t->capture[0] = old; 3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FreeThread(t); 3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (++i; i != runq->end(); ++i) 3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FreeThread(i->second); 3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) runq->clear(); 3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matched_ = true; 3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0; 3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) t->capture[0] = old; 3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matched_ = true; 3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FreeThread(t); 3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) runq->clear(); 3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0; 3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string NFA::FormatCapture(const char** capture) { 3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string s; 3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < ncapture_; i+=2) { 3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (capture[i] == NULL) 3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringAppendF(&s, "(?,?)"); 3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (capture[i+1] == NULL) 3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringAppendF(&s, "(%d,?)", (int)(capture[i] - btext_)); 3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringAppendF(&s, "(%d,%d)", 3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (int)(capture[i] - btext_), 3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (int)(capture[i+1] - btext_)); 3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return s; 3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns whether haystack contains needle's memory. 3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static bool StringPieceContains(const StringPiece haystack, const StringPiece needle) { 3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return haystack.begin() <= needle.begin() && 3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) haystack.end() >= needle.end(); 3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool NFA::Search(const StringPiece& text, const StringPiece& const_context, 4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool anchored, bool longest, 4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* submatch, int nsubmatch) { 4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (start_ == 0) 4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece context = const_context; 4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (context.begin() == NULL) 4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) context = text; 4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!StringPieceContains(context, text)) { 4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(FATAL) << "Bad args: context does not contain text " 4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << reinterpret_cast<const void*>(context.begin()) 4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << "+" << context.size() << " " 4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << reinterpret_cast<const void*>(text.begin()) 4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) << "+" << text.size(); 4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_->anchor_start() && context.begin() != text.begin()) 4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_->anchor_end() && context.end() != text.end()) 4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) anchored |= prog_->anchor_start(); 4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_->anchor_end()) { 4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest = true; 4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) endmatch_ = true; 4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) etext_ = text.end(); 4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nsubmatch < 0) { 4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch; 4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Save search parameters. 4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ncapture_ = 2*nsubmatch; 4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest_ = longest; 4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nsubmatch == 0) { 4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We need to maintain match[0], both to distinguish the 4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // longest match (if longest is true) and also to tell 4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // whether we've seen any matches at all. 4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ncapture_ = 2; 4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) match_ = new const char*[ncapture_]; 4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matched_ = false; 4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(match_, 0, ncapture_*sizeof match_[0]); 4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // For debugging prints. 4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) btext_ = context.begin(); 4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (Debug) { 4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n", 4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text.as_string().c_str(), context.as_string().c_str(), anchored, 4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest); 4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Set up search. 4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Threadq* runq = &q0_; 4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Threadq* nextq = &q1_; 4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) runq->clear(); 4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nextq->clear(); 4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(&match_[0], 0, ncapture_*sizeof match_[0]); 4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* bp = context.begin(); 4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int c = -1; 4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int wasword = 0; 4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (text.begin() > context.begin()) { 4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = text.begin()[-1] & 0xFF; 4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) wasword = Prog::IsWordChar(c); 4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Loop over the text, stepping the machine. 4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (const char* p = text.begin();; p++) { 4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Check for empty-width specials. 4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int flag = 0; 4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // ^ and \A 4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p == context.begin()) 4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flag |= kEmptyBeginText | kEmptyBeginLine; 4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (p <= context.end() && p[-1] == '\n') 4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flag |= kEmptyBeginLine; 4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // $ and \z 4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p == context.end()) 4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flag |= kEmptyEndText | kEmptyEndLine; 4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (p < context.end() && p[0] == '\n') 4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flag |= kEmptyEndLine; 4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // \b and \B 4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int isword = 0; 4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p < context.end()) 4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) isword = Prog::IsWordChar(p[0] & 0xFF); 4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (isword != wasword) 4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flag |= kEmptyWordBoundary; 4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flag |= kEmptyNonWordBoundary; 5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (Debug) { 5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fprintf(stderr, "%c[%#x/%d/%d]:", p > text.end() ? '$' : p == bp ? '^' : c, flag, isword, wasword); 5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) { 5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Thread* t = i->second; 5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (t == NULL) 5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fprintf(stderr, " %d%s", t->id, 5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FormatCapture((const char**)t->capture).c_str()); 5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fprintf(stderr, "\n"); 5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Process previous character (waited until now to avoid 5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // repeating the flag computation above). 5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This is a no-op the first time around the loop, because 5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // runq is empty. 5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int id = Step(runq, nextq, c, flag, p-1); 5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK_EQ(runq->size(), 0); 5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) swap(nextq, runq); 5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nextq->clear(); 5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (id != 0) { 5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We're done: full match ahead. 5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p = text.end(); 5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (;;) { 5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::Inst* ip = prog_->inst(id); 5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (ip->opcode()) { 5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode(); 5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstCapture: 5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) match_[ip->cap()] = p; 5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out(); 5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstNop: 5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out(); 5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstMatch: 5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) match_[1] = p; 5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matched_ = true; 5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstEmptyWidth: 5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->empty() & ~(kEmptyEndLine|kEmptyEndText)) { 5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "Unexpected empty-width in short circuit: " << ip->empty(); 5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out(); 5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p > text.end()) 5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Start a new thread if there have not been any matches. 5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (No point in starting a new thread if there have been 5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // matches, since it would be to the right of the match 5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // we already found.) 5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!matched_ && (!anchored || p == text.begin())) { 5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If there's a required first byte for an unanchored search 5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and we're not in the middle of any possible matches, 5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // use memchr to search for the byte quickly. 5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!anchored && first_byte_ >= 0 && runq->size() == 0 && 5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p < text.end() && (p[0] & 0xFF) != first_byte_) { 5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p = reinterpret_cast<const char*>(memchr(p, first_byte_, 5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text.end() - p)); 5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p == NULL) { 5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p = text.end(); 5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) isword = 0; 5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) isword = Prog::IsWordChar(p[0] & 0xFF); 5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) flag = Prog::EmptyFlags(context, p); 5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Steal match storage (cleared but unused as of yet) 5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // temporarily to hold match boundaries for new thread. 5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) match_[0] = p; 5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AddToThreadq(runq, start_, flag, p, match_); 5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) match_[0] = NULL; 5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If all the threads have died, stop early. 5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (runq->size() == 0) { 5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (Debug) 5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fprintf(stderr, "dead\n"); 5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p == text.end()) 5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = 0; 5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = *p & 0xFF; 6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) wasword = isword; 6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Will run step(runq, nextq, c, ...) on next iteration. See above. 6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) 6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FreeThread(i->second); 6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (matched_) { 6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < nsubmatch; i++) 6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch[i].set(match_[2*i], match_[2*i+1] - match_[2*i]); 6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (Debug) 6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) fprintf(stderr, "match (%d,%d)\n", 6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static_cast<int>(match_[0] - btext_), 6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static_cast<int>(match_[1] - btext_)); 6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) VLOG(1) << "No matches found"; 6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Computes whether all successful matches have a common first byte, 6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and if so, returns that byte. If not, returns -1. 6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int NFA::ComputeFirstByte() { 6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (start_ == 0) 6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int b = -1; // first byte, not yet computed 6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) typedef SparseSet Workq; 6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Workq q(prog_->size()); 6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q.insert(start_); 6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (Workq::iterator it = q.begin(); it != q.end(); ++it) { 6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int id = *it; 6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::Inst* ip = prog_->inst(id); 6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (ip->opcode()) { 6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "unhandled " << ip->opcode() << " in ComputeFirstByte"; 6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstMatch: 6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The empty string matches: no first byte. 6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstByteRange: 6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Must match only a single byte 6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->lo() != ip->hi()) 6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z') 6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If we haven't seen any bytes yet, record it; 6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // otherwise must match the one we saw before. 6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (b == -1) 6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) b = ip->lo(); 6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else if (b != ip->lo()) 6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstNop: 6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstCapture: 6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstEmptyWidth: 6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Continue on. 6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Ignore ip->empty() flags for kInstEmptyWidth 6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // in order to be as conservative as possible 6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (assume all possible empty-width flags are true). 6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->out()) 6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q.insert(ip->out()); 6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstAlt: 6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstAltMatch: 6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Explore alternatives. 6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->out()) 6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q.insert(ip->out()); 6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->out1()) 6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) q.insert(ip->out1()); 6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstFail: 6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return b; 6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool 6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Prog::SearchNFA(const StringPiece& text, const StringPiece& context, 6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Anchor anchor, MatchKind kind, 6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* match, int nmatch) { 6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (NFA::Debug) 6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Dump(); 6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NFA nfa(this); 6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece sp; 6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (kind == kFullMatch) { 6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) anchor = kAnchored; 6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nmatch == 0) { 6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) match = &sp; 6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nmatch = 1; 6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch)) 7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (kind == kFullMatch && match[0].end() != text.end()) 7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace re2 7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 710