10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2008 The RE2 Authors. All Rights Reserved. 20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style 30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file. 40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tested by search_test.cc, exhaustive_test.cc, tester.cc 60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Prog::BadSearchBacktrack is a backtracking regular expression search, 80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// except that it remembers where it has been, trading a lot of 90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// memory for a lot of time. It exists only for testing purposes. 100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Let me repeat that. 120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// THIS CODE SHOULD NEVER BE USED IN PRODUCTION: 140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// - It uses a ton of memory. 150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// - It uses a ton of stack. 160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// - It uses CHECK and LOG(FATAL). 170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// - It implements unanchored search by repeated anchored search. 180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// On the other hand, it is very simple and a good reference 200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// implementation for the more complicated regexp packages. 210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// In BUILD, this file is linked into the ":testing" library, 230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// not the main library, in order to make it harder to pick up 240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// accidentally. 250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/util.h" 270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/prog.h" 280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/regexp.h" 290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 { 310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Backtracker holds the state for a backtracking search. 330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Excluding the search parameters, the main search state 350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// is just the "capture registers", which record, for the 360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// current execution, the string position at which each 370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// parenthesis was passed. cap_[0] and cap_[1] are the 380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// left and right parenthesis in $0, cap_[2] and cap_[3] in $1, etc. 390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// 400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// To avoid infinite loops during backtracking on expressions 410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// like (a*)*, the visited_[] bitmap marks the (state, string-position) 420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// pairs that have already been explored and are thus not worth 430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// re-exploring if we get there via another path. Modern backtracking 440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// libraries engineer their program representation differently, to make 450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// such infinite loops possible to avoid without keeping a giant visited_ 460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// bitmap, but visited_ works fine for a reference implementation 470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// and it has the nice benefit of making the search run in linear time. 480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinclass Backtracker { 490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin public: 500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin explicit Backtracker(Prog* prog); 510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin ~Backtracker(); 520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool Search(const StringPiece& text, const StringPiece& context, 540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool anchored, bool longest, 550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPiece* submatch, int nsubmatch); 560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin private: 580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Explores from instruction ip at string position p looking for a match. 590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Returns true if found (so that caller can stop trying other possibilities). 600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool Visit(int id, const char* p); 610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Search parameters 630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog* prog_; // program being run 640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPiece text_; // text being searched 650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPiece context_; // greater context of text being searched 660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool anchored_; // whether search is anchored at text.begin() 670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool longest_; // whether search wants leftmost-longest match 680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool endmatch_; // whether search must end at text.end() 690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPiece *submatch_; // submatches to fill in 700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int nsubmatch_; // # of submatches to fill in 710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Search state 730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const char* cap_[64]; // capture registers 740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin uint32 *visited_; // bitmap: (Inst*, char*) pairs already backtracked 750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int nvisited_; // # of words in bitmap 760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}; 770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinBacktracker::Backtracker(Prog* prog) 790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin : prog_(prog), 800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin anchored_(false), 810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin longest_(false), 820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin endmatch_(false), 830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin submatch_(NULL), 840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin nsubmatch_(0), 850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin visited_(NULL), 860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin nvisited_(0) { 870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinBacktracker::~Backtracker() { 900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete[] visited_; 910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Runs a backtracking search. 940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinbool Backtracker::Search(const StringPiece& text, const StringPiece& context, 950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool anchored, bool longest, 960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPiece* submatch, int nsubmatch) { 970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin text_ = text; 980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin context_ = context; 990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (context_.begin() == NULL) 1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin context_ = text; 1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (prog_->anchor_start() && text.begin() > context_.begin()) 1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (prog_->anchor_end() && text.end() < context_.end()) 1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin anchored_ = anchored | prog_->anchor_start(); 1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin longest_ = longest | prog_->anchor_end(); 1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin endmatch_ = prog_->anchor_end(); 1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin submatch_ = submatch; 1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin nsubmatch_ = nsubmatch; 1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(2*nsubmatch_ < arraysize(cap_)); 1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin memset(cap_, 0, sizeof cap_); 1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // We use submatch_[0] for our own bookkeeping, 1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // so it had better exist. 1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPiece sp0; 1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (nsubmatch < 1) { 1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin submatch_ = &sp0; 1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin nsubmatch_ = 1; 1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin submatch_[0] = NULL; 1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Allocate new visited_ bitmap -- size is proportional 1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // to text, so have to reallocate on each call to Search. 1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin delete[] visited_; 1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin nvisited_ = (prog_->size()*(text.size()+1) + 31)/32; 1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin visited_ = new uint32[nvisited_]; 1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin memset(visited_, 0, nvisited_*sizeof visited_[0]); 1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Anchored search must start at text.begin(). 1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (anchored_) { 1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin cap_[0] = text.begin(); 1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return Visit(prog_->start(), text.begin()); 1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Unanchored search, starting from each possible text position. 1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Notice that we have to try the empty string at the end of 1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // the text, so the loop condition is p <= text.end(), not p < text.end(). 1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (const char* p = text.begin(); p <= text.end(); p++) { 1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin cap_[0] = p; 1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (Visit(prog_->start(), p)) // Match must be leftmost; done. 1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return true; 1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Explores from instruction ip at string position p looking for a match. 1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Return true if found (so that caller can stop trying other possibilities). 1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinbool Backtracker::Visit(int id, const char* p) { 1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Check bitmap. If we've already explored from here, 1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // either it didn't match or it did but we're hoping for a better match. 1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Either way, don't go down that road again. 1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK(p <= text_.end()); 1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int n = id*(text_.size()+1) + (p - text_.begin()); 1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin CHECK_LT(n/32, nvisited_); 1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (visited_[n/32] & (1 << (n&31))) 1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin visited_[n/32] |= 1 << (n&31); 1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Pick out byte at current position. If at end of string, 1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // have to explore in hope of finishing a match. Use impossible byte -1. 1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int c = -1; 1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (p < text_.end()) 1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin c = *p & 0xFF; 1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Prog::Inst* ip = prog_->inst(id); 1660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin switch (ip->opcode()) { 1670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin default: 1680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode(); 1690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; // not reached 1700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kInstAlt: 1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kInstAltMatch: 1730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Try both possible next states: out is preferred to out1. 1740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (Visit(ip->out(), p)) { 1750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (longest_) 1760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Visit(ip->out1(), p); 1770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return true; 1780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return Visit(ip->out1(), p); 1800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kInstByteRange: 1820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (ip->Matches(c)) 1830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return Visit(ip->out(), p+1); 1840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 1850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kInstCapture: 1870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (0 <= ip->cap() && ip->cap() < arraysize(cap_)) { 1880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Capture p to register, but save old value. 1890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const char* q = cap_[ip->cap()]; 1900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin cap_[ip->cap()] = p; 1910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool ret = Visit(ip->out(), p); 1920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Restore old value as we backtrack. 1930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin cap_[ip->cap()] = q; 1940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return ret; 1950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 1960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return Visit(ip->out(), p); 1970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 1980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kInstEmptyWidth: 1990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (ip->empty() & ~Prog::EmptyFlags(context_, p)) 2000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 2010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return Visit(ip->out(), p); 2020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kInstNop: 2040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return Visit(ip->out(), p); 2050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kInstMatch: 2070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // We found a match. If it's the best so far, record the 2080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // parameters in the caller's submatch_ array. 2090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (endmatch_ && p != context_.end()) 2100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 2110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin cap_[1] = p; 2120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (submatch_[0].data() == NULL || // First match so far ... 2130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin (longest_ && p > submatch_[0].end())) { // ... or better match 2140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin for (int i = 0; i < nsubmatch_; i++) 2150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]); 2160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return true; 2180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin case kInstFail: 2200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 2210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 2230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Runs a backtracking search. 2250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinbool Prog::UnsafeSearchBacktrack(const StringPiece& text, 2260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin const StringPiece& context, 2270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Anchor anchor, 2280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin MatchKind kind, 2290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPiece* match, 2300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin int nmatch) { 2310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // If full match, we ask for an anchored longest match 2320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // and then check that match[0] == text. 2330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // So make sure match[0] exists. 2340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin StringPiece sp0; 2350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (kind == kFullMatch) { 2360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin anchor = kAnchored; 2370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (nmatch < 1) { 2380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin match = &sp0; 2390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin nmatch = 1; 2400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin } 2420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin // Run the search. 2440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin Backtracker b(this); 2450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool anchored = anchor == kAnchored; 2460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool longest = kind != kFirstMatch; 2470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (!b.Search(text, context, anchored, longest, match, nmatch)) 2480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 2490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (kind == kFullMatch && match[0].end() != text.end()) 2500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return false; 2510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin return true; 2520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} 2530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin 2540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin} // namespace re2 255