15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2008 The RE2 Authors. All Rights Reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Tested by search_test.cc, exhaustive_test.cc, tester.cc 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Prog::BadSearchBacktrack is a backtracking regular expression search, 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// except that it remembers where it has been, trading a lot of 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// memory for a lot of time. It exists only for testing purposes. 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Let me repeat that. 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// THIS CODE SHOULD NEVER BE USED IN PRODUCTION: 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - It uses a ton of memory. 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - It uses a ton of stack. 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - It uses CHECK and LOG(FATAL). 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - It implements unanchored search by repeated anchored search. 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// On the other hand, it is very simple and a good reference 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// implementation for the more complicated regexp packages. 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// In BUILD, this file is linked into the ":testing" library, 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// not the main library, in order to make it harder to pick up 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// accidentally. 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h" 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h" 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h" 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 { 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Backtracker holds the state for a backtracking search. 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Excluding the search parameters, the main search state 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is just the "capture registers", which record, for the 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// current execution, the string position at which each 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// parenthesis was passed. cap_[0] and cap_[1] are the 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// left and right parenthesis in $0, cap_[2] and cap_[3] in $1, etc. 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To avoid infinite loops during backtracking on expressions 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// like (a*)*, the visited_[] bitmap marks the (state, string-position) 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// pairs that have already been explored and are thus not worth 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// re-exploring if we get there via another path. Modern backtracking 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// libraries engineer their program representation differently, to make 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// such infinite loops possible to avoid without keeping a giant visited_ 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// bitmap, but visited_ works fine for a reference implementation 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and it has the nice benefit of making the search run in linear time. 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Backtracker { 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) explicit Backtracker(Prog* prog); 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~Backtracker(); 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Search(const StringPiece& text, const StringPiece& context, 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool anchored, bool longest, 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* submatch, int nsubmatch); 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Explores from instruction ip at string position p looking for a match. 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if found (so that caller can stop trying other possibilities). 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Visit(int id, const char* p); 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Search parameters 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog* prog_; // program being run 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece text_; // text being searched 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece context_; // greater context of text being searched 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool anchored_; // whether search is anchored at text.begin() 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool longest_; // whether search wants leftmost-longest match 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool endmatch_; // whether search must end at text.end() 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece *submatch_; // submatches to fill in 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nsubmatch_; // # of submatches to fill in 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Search state 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* cap_[64]; // capture registers 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) uint32 *visited_; // bitmap: (Inst*, char*) pairs already backtracked 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nvisited_; // # of words in bitmap 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Backtracker::Backtracker(Prog* prog) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : prog_(prog), 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) anchored_(false), 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest_(false), 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) endmatch_(false), 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch_(NULL), 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nsubmatch_(0), 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) visited_(NULL), 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nvisited_(0) { 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Backtracker::~Backtracker() { 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] visited_; 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Runs a backtracking search. 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Backtracker::Search(const StringPiece& text, const StringPiece& context, 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool anchored, bool longest, 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* submatch, int nsubmatch) { 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text_ = text; 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) context_ = context; 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (context_.begin() == NULL) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) context_ = text; 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_->anchor_start() && text.begin() > context_.begin()) 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_->anchor_end() && text.end() < context_.end()) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) anchored_ = anchored | prog_->anchor_start(); 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest_ = longest | prog_->anchor_end(); 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) endmatch_ = prog_->anchor_end(); 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch_ = submatch; 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nsubmatch_ = nsubmatch; 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CHECK(2*nsubmatch_ < arraysize(cap_)); 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(cap_, 0, sizeof cap_); 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We use submatch_[0] for our own bookkeeping, 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // so it had better exist. 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece sp0; 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nsubmatch < 1) { 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch_ = &sp0; 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nsubmatch_ = 1; 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch_[0] = NULL; 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Allocate new visited_ bitmap -- size is proportional 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to text, so have to reallocate on each call to Search. 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] visited_; 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nvisited_ = (prog_->size()*(text.size()+1) + 31)/32; 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) visited_ = new uint32[nvisited_]; 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(visited_, 0, nvisited_*sizeof visited_[0]); 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Anchored search must start at text.begin(). 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (anchored_) { 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_[0] = text.begin(); 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return Visit(prog_->start(), text.begin()); 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Unanchored search, starting from each possible text position. 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Notice that we have to try the empty string at the end of 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the text, so the loop condition is p <= text.end(), not p < text.end(). 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (const char* p = text.begin(); p <= text.end(); p++) { 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_[0] = p; 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (Visit(prog_->start(), p)) // Match must be leftmost; done. 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Explores from instruction ip at string position p looking for a match. 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Return true if found (so that caller can stop trying other possibilities). 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Backtracker::Visit(int id, const char* p) { 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Check bitmap. If we've already explored from here, 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // either it didn't match or it did but we're hoping for a better match. 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Either way, don't go down that road again. 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CHECK(p <= text_.end()); 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int n = id*(text_.size()+1) + (p - text_.begin()); 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CHECK_LT(n/32, nvisited_); 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (visited_[n/32] & (1 << (n&31))) 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) visited_[n/32] |= 1 << (n&31); 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Pick out byte at current position. If at end of string, 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // have to explore in hope of finishing a match. Use impossible byte -1. 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int c = -1; 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p < text_.end()) 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = *p & 0xFF; 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::Inst* ip = prog_->inst(id); 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (ip->opcode()) { 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode(); 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; // not reached 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstAlt: 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstAltMatch: 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Try both possible next states: out is preferred to out1. 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (Visit(ip->out(), p)) { 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (longest_) 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Visit(ip->out1(), p); 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return Visit(ip->out1(), p); 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstByteRange: 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->Matches(c)) 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return Visit(ip->out(), p+1); 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstCapture: 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (0 <= ip->cap() && ip->cap() < arraysize(cap_)) { 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Capture p to register, but save old value. 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* q = cap_[ip->cap()]; 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_[ip->cap()] = p; 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool ret = Visit(ip->out(), p); 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Restore old value as we backtrack. 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_[ip->cap()] = q; 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ret; 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return Visit(ip->out(), p); 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstEmptyWidth: 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->empty() & ~Prog::EmptyFlags(context_, p)) 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return Visit(ip->out(), p); 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstNop: 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return Visit(ip->out(), p); 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstMatch: 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We found a match. If it's the best so far, record the 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // parameters in the caller's submatch_ array. 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (endmatch_ && p != context_.end()) 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_[1] = p; 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (submatch_[0].data() == NULL || // First match so far ... 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (longest_ && p > submatch_[0].end())) { // ... or better match 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < nsubmatch_; i++) 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]); 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstFail: 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Runs a backtracking search. 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Prog::UnsafeSearchBacktrack(const StringPiece& text, 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& context, 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Anchor anchor, 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MatchKind kind, 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* match, 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nmatch) { 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If full match, we ask for an anchored longest match 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and then check that match[0] == text. 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // So make sure match[0] exists. 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece sp0; 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (kind == kFullMatch) { 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) anchor = kAnchored; 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nmatch < 1) { 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) match = &sp0; 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nmatch = 1; 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Run the search. 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Backtracker b(this); 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool anchored = anchor == kAnchored; 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool longest = kind != kFirstMatch; 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!b.Search(text, context, anchored, longest, match, nmatch)) 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (kind == kFullMatch && match[0].end() != text.end()) 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace re2 255