15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2008 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Tested by search_test.cc, exhaustive_test.cc, tester.cc
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Prog::BadSearchBacktrack is a backtracking regular expression search,
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// except that it remembers where it has been, trading a lot of
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// memory for a lot of time. It exists only for testing purposes.
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Let me repeat that.
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// THIS CODE SHOULD NEVER BE USED IN PRODUCTION:
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   - It uses a ton of memory.
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   - It uses a ton of stack.
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   - It uses CHECK and LOG(FATAL).
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   - It implements unanchored search by repeated anchored search.
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// On the other hand, it is very simple and a good reference
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// implementation for the more complicated regexp packages.
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// In BUILD, this file is linked into the ":testing" library,
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// not the main library, in order to make it harder to pick up
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// accidentally.
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h"
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h"
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h"
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Backtracker holds the state for a backtracking search.
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Excluding the search parameters, the main search state
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is just the "capture registers", which record, for the
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// current execution, the string position at which each
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// parenthesis was passed.  cap_[0] and cap_[1] are the
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// left and right parenthesis in $0, cap_[2] and cap_[3] in $1, etc.
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// To avoid infinite loops during backtracking on expressions
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// like (a*)*, the visited_[] bitmap marks the (state, string-position)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// pairs that have already been explored and are thus not worth
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// re-exploring if we get there via another path.  Modern backtracking
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// libraries engineer their program representation differently, to make
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// such infinite loops possible to avoid without keeping a giant visited_
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// bitmap, but visited_ works fine for a reference implementation
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and it has the nice benefit of making the search run in linear time.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Backtracker {
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit Backtracker(Prog* prog);
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~Backtracker();
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Search(const StringPiece& text, const StringPiece& context,
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              bool anchored, bool longest,
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              StringPiece* submatch, int nsubmatch);
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Explores from instruction ip at string position p looking for a match.
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if found (so that caller can stop trying other possibilities).
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Visit(int id, const char* p);
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Search parameters
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog* prog_;              // program being run
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece text_;        // text being searched
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece context_;     // greater context of text being searched
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool anchored_;           // whether search is anchored at text.begin()
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool longest_;            // whether search wants leftmost-longest match
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool endmatch_;           // whether search must end at text.end()
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece *submatch_;   // submatches to fill in
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nsubmatch_;           //   # of submatches to fill in
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Search state
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* cap_[64];     // capture registers
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 *visited_;         // bitmap: (Inst*, char*) pairs already backtracked
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int nvisited_;            //   # of words in bitmap
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Backtracker::Backtracker(Prog* prog)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  : prog_(prog),
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    anchored_(false),
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    longest_(false),
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    endmatch_(false),
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    submatch_(NULL),
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    nsubmatch_(0),
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    visited_(NULL),
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    nvisited_(0) {
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Backtracker::~Backtracker() {
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete[] visited_;
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Runs a backtracking search.
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Backtracker::Search(const StringPiece& text, const StringPiece& context,
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         bool anchored, bool longest,
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         StringPiece* submatch, int nsubmatch) {
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  text_ = text;
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  context_ = context;
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (context_.begin() == NULL)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    context_ = text;
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (prog_->anchor_start() && text.begin() > context_.begin())
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (prog_->anchor_end() && text.end() < context_.end())
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  anchored_ = anchored | prog_->anchor_start();
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  longest_ = longest | prog_->anchor_end();
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  endmatch_ = prog_->anchor_end();
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  submatch_ = submatch;
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  nsubmatch_ = nsubmatch;
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(2*nsubmatch_ < arraysize(cap_));
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memset(cap_, 0, sizeof cap_);
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We use submatch_[0] for our own bookkeeping,
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // so it had better exist.
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece sp0;
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (nsubmatch < 1) {
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    submatch_ = &sp0;
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    nsubmatch_ = 1;
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  submatch_[0] = NULL;
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Allocate new visited_ bitmap -- size is proportional
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // to text, so have to reallocate on each call to Search.
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete[] visited_;
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  nvisited_ = (prog_->size()*(text.size()+1) + 31)/32;
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  visited_ = new uint32[nvisited_];
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  memset(visited_, 0, nvisited_*sizeof visited_[0]);
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Anchored search must start at text.begin().
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (anchored_) {
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cap_[0] = text.begin();
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return Visit(prog_->start(), text.begin());
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Unanchored search, starting from each possible text position.
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Notice that we have to try the empty string at the end of
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the text, so the loop condition is p <= text.end(), not p < text.end().
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (const char* p = text.begin(); p <= text.end(); p++) {
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    cap_[0] = p;
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (Visit(prog_->start(), p))  // Match must be leftmost; done.
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Explores from instruction ip at string position p looking for a match.
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Return true if found (so that caller can stop trying other possibilities).
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Backtracker::Visit(int id, const char* p) {
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check bitmap.  If we've already explored from here,
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // either it didn't match or it did but we're hoping for a better match.
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Either way, don't go down that road again.
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(p <= text_.end());
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int n = id*(text_.size()+1) + (p - text_.begin());
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK_LT(n/32, nvisited_);
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (visited_[n/32] & (1 << (n&31)))
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  visited_[n/32] |= 1 << (n&31);
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Pick out byte at current position.  If at end of string,
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // have to explore in hope of finishing a match.  Use impossible byte -1.
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int c = -1;
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (p < text_.end())
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    c = *p & 0xFF;
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prog::Inst* ip = prog_->inst(id);
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (ip->opcode()) {
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default:
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;  // not reached
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kInstAlt:
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kInstAltMatch:
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Try both possible next states: out is preferred to out1.
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (Visit(ip->out(), p)) {
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (longest_)
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          Visit(ip->out1(), p);
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return true;
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return Visit(ip->out1(), p);
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kInstByteRange:
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (ip->Matches(c))
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return Visit(ip->out(), p+1);
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kInstCapture:
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (0 <= ip->cap() && ip->cap() < arraysize(cap_)) {
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Capture p to register, but save old value.
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        const char* q = cap_[ip->cap()];
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        cap_[ip->cap()] = p;
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        bool ret = Visit(ip->out(), p);
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Restore old value as we backtrack.
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        cap_[ip->cap()] = q;
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return ret;
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return Visit(ip->out(), p);
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kInstEmptyWidth:
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (ip->empty() & ~Prog::EmptyFlags(context_, p))
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return Visit(ip->out(), p);
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kInstNop:
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return Visit(ip->out(), p);
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kInstMatch:
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We found a match.  If it's the best so far, record the
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // parameters in the caller's submatch_ array.
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (endmatch_ && p != context_.end())
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      cap_[1] = p;
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (submatch_[0].data() == NULL ||           // First match so far ...
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          (longest_ && p > submatch_[0].end())) {  // ... or better match
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (int i = 0; i < nsubmatch_; i++)
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]);
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case kInstFail:
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Runs a backtracking search.
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Prog::UnsafeSearchBacktrack(const StringPiece& text,
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 const StringPiece& context,
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 Anchor anchor,
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 MatchKind kind,
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 StringPiece* match,
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 int nmatch) {
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If full match, we ask for an anchored longest match
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // and then check that match[0] == text.
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // So make sure match[0] exists.
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringPiece sp0;
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (kind == kFullMatch) {
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    anchor = kAnchored;
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (nmatch < 1) {
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      match = &sp0;
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      nmatch = 1;
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Run the search.
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Backtracker b(this);
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool anchored = anchor == kAnchored;
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool longest = kind != kFirstMatch;
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!b.Search(text, context, anchored, longest, match, nmatch))
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (kind == kFullMatch && match[0].end() != text.end())
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
255