10d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Copyright 2008 The RE2 Authors.  All Rights Reserved.
20d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Use of this source code is governed by a BSD-style
30d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// license that can be found in the LICENSE file.
40d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
50d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Tested by search_test.cc, exhaustive_test.cc, tester.cc
60d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
70d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Prog::BadSearchBacktrack is a backtracking regular expression search,
80d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// except that it remembers where it has been, trading a lot of
90d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// memory for a lot of time. It exists only for testing purposes.
100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Let me repeat that.
120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// THIS CODE SHOULD NEVER BE USED IN PRODUCTION:
140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//   - It uses a ton of memory.
150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//   - It uses a ton of stack.
160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//   - It uses CHECK and LOG(FATAL).
170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//   - It implements unanchored search by repeated anchored search.
180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// On the other hand, it is very simple and a good reference
200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// implementation for the more complicated regexp packages.
210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// In BUILD, this file is linked into the ":testing" library,
230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// not the main library, in order to make it harder to pick up
240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// accidentally.
250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "util/util.h"
270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/prog.h"
280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin#include "re2/regexp.h"
290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinnamespace re2 {
310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Backtracker holds the state for a backtracking search.
330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Excluding the search parameters, the main search state
350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// is just the "capture registers", which record, for the
360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// current execution, the string position at which each
370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// parenthesis was passed.  cap_[0] and cap_[1] are the
380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// left and right parenthesis in $0, cap_[2] and cap_[3] in $1, etc.
390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin//
400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// To avoid infinite loops during backtracking on expressions
410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// like (a*)*, the visited_[] bitmap marks the (state, string-position)
420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// pairs that have already been explored and are thus not worth
430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// re-exploring if we get there via another path.  Modern backtracking
440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// libraries engineer their program representation differently, to make
450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// such infinite loops possible to avoid without keeping a giant visited_
460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// bitmap, but visited_ works fine for a reference implementation
470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// and it has the nice benefit of making the search run in linear time.
480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinclass Backtracker {
490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin public:
500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  explicit Backtracker(Prog* prog);
510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  ~Backtracker();
520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  bool Search(const StringPiece& text, const StringPiece& context,
540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin              bool anchored, bool longest,
550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin              StringPiece* submatch, int nsubmatch);
560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin private:
580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Explores from instruction ip at string position p looking for a match.
590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Returns true if found (so that caller can stop trying other possibilities).
600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  bool Visit(int id, const char* p);
610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Search parameters
630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  Prog* prog_;              // program being run
640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  StringPiece text_;        // text being searched
650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  StringPiece context_;     // greater context of text being searched
660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  bool anchored_;           // whether search is anchored at text.begin()
670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  bool longest_;            // whether search wants leftmost-longest match
680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  bool endmatch_;           // whether search must end at text.end()
690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  StringPiece *submatch_;   // submatches to fill in
700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int nsubmatch_;           //   # of submatches to fill in
710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Search state
730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  const char* cap_[64];     // capture registers
740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  uint32 *visited_;         // bitmap: (Inst*, char*) pairs already backtracked
750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int nvisited_;            //   # of words in bitmap
760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin};
770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinBacktracker::Backtracker(Prog* prog)
790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  : prog_(prog),
800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    anchored_(false),
810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    longest_(false),
820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    endmatch_(false),
830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    submatch_(NULL),
840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    nsubmatch_(0),
850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    visited_(NULL),
860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    nvisited_(0) {
870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander GutkinBacktracker::~Backtracker() {
900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  delete[] visited_;
910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Runs a backtracking search.
940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinbool Backtracker::Search(const StringPiece& text, const StringPiece& context,
950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                         bool anchored, bool longest,
960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                         StringPiece* submatch, int nsubmatch) {
970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  text_ = text;
980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  context_ = context;
990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (context_.begin() == NULL)
1000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    context_ = text;
1010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (prog_->anchor_start() && text.begin() > context_.begin())
1020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return false;
1030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (prog_->anchor_end() && text.end() < context_.end())
1040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return false;
1050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  anchored_ = anchored | prog_->anchor_start();
1060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  longest_ = longest | prog_->anchor_end();
1070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  endmatch_ = prog_->anchor_end();
1080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  submatch_ = submatch;
1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  nsubmatch_ = nsubmatch;
1100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK(2*nsubmatch_ < arraysize(cap_));
1110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  memset(cap_, 0, sizeof cap_);
1120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // We use submatch_[0] for our own bookkeeping,
1140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // so it had better exist.
1150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  StringPiece sp0;
1160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (nsubmatch < 1) {
1170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    submatch_ = &sp0;
1180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    nsubmatch_ = 1;
1190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  submatch_[0] = NULL;
1210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Allocate new visited_ bitmap -- size is proportional
1230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // to text, so have to reallocate on each call to Search.
1240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  delete[] visited_;
1250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  nvisited_ = (prog_->size()*(text.size()+1) + 31)/32;
1260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  visited_ = new uint32[nvisited_];
1270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  memset(visited_, 0, nvisited_*sizeof visited_[0]);
1280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Anchored search must start at text.begin().
1300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (anchored_) {
1310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    cap_[0] = text.begin();
1320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return Visit(prog_->start(), text.begin());
1330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Unanchored search, starting from each possible text position.
1360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Notice that we have to try the empty string at the end of
1370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // the text, so the loop condition is p <= text.end(), not p < text.end().
1380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  for (const char* p = text.begin(); p <= text.end(); p++) {
1390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    cap_[0] = p;
1400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (Visit(prog_->start(), p))  // Match must be leftmost; done.
1410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return true;
1420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
1430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  return false;
1440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
1450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Explores from instruction ip at string position p looking for a match.
1470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Return true if found (so that caller can stop trying other possibilities).
1480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinbool Backtracker::Visit(int id, const char* p) {
1490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Check bitmap.  If we've already explored from here,
1500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // either it didn't match or it did but we're hoping for a better match.
1510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Either way, don't go down that road again.
1520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK(p <= text_.end());
1530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int n = id*(text_.size()+1) + (p - text_.begin());
1540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  CHECK_LT(n/32, nvisited_);
1550d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (visited_[n/32] & (1 << (n&31)))
1560d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return false;
1570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  visited_[n/32] |= 1 << (n&31);
1580d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1590d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Pick out byte at current position.  If at end of string,
1600d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // have to explore in hope of finishing a match.  Use impossible byte -1.
1610d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  int c = -1;
1620d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (p < text_.end())
1630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    c = *p & 0xFF;
1640d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1650d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  Prog::Inst* ip = prog_->inst(id);
1660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  switch (ip->opcode()) {
1670d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    default:
1680d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      LOG(FATAL) << "Unexpected opcode: " << (int)ip->opcode();
1690d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return false;  // not reached
1700d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1710d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kInstAlt:
1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kInstAltMatch:
1730d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      // Try both possible next states: out is preferred to out1.
1740d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (Visit(ip->out(), p)) {
1750d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        if (longest_)
1760d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin          Visit(ip->out1(), p);
1770d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        return true;
1780d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
1790d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return Visit(ip->out1(), p);
1800d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1810d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kInstByteRange:
1820d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (ip->Matches(c))
1830d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        return Visit(ip->out(), p+1);
1840d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return false;
1850d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1860d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kInstCapture:
1870d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (0 <= ip->cap() && ip->cap() < arraysize(cap_)) {
1880d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        // Capture p to register, but save old value.
1890d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        const char* q = cap_[ip->cap()];
1900d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        cap_[ip->cap()] = p;
1910d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        bool ret = Visit(ip->out(), p);
1920d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        // Restore old value as we backtrack.
1930d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        cap_[ip->cap()] = q;
1940d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        return ret;
1950d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
1960d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return Visit(ip->out(), p);
1970d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
1980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kInstEmptyWidth:
1990d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (ip->empty() & ~Prog::EmptyFlags(context_, p))
2000d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        return false;
2010d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return Visit(ip->out(), p);
2020d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2030d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kInstNop:
2040d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return Visit(ip->out(), p);
2050d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2060d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kInstMatch:
2070d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      // We found a match.  If it's the best so far, record the
2080d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      // parameters in the caller's submatch_ array.
2090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (endmatch_ && p != context_.end())
2100d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        return false;
2110d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      cap_[1] = p;
2120d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      if (submatch_[0].data() == NULL ||           // First match so far ...
2130d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin          (longest_ && p > submatch_[0].end())) {  // ... or better match
2140d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        for (int i = 0; i < nsubmatch_; i++)
2150d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin          submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]);
2160d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      }
2170d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return true;
2180d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2190d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    case kInstFail:
2200d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      return false;
2210d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2220d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
2230d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2240d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin// Runs a backtracking search.
2250d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkinbool Prog::UnsafeSearchBacktrack(const StringPiece& text,
2260d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 const StringPiece& context,
2270d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 Anchor anchor,
2280d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 MatchKind kind,
2290d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 StringPiece* match,
2300d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin                                 int nmatch) {
2310d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // If full match, we ask for an anchored longest match
2320d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // and then check that match[0] == text.
2330d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // So make sure match[0] exists.
2340d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  StringPiece sp0;
2350d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (kind == kFullMatch) {
2360d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    anchor = kAnchored;
2370d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (nmatch < 1) {
2380d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      match = &sp0;
2390d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin      nmatch = 1;
2400d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    }
2410d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  }
2420d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2430d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  // Run the search.
2440d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  Backtracker b(this);
2450d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  bool anchored = anchor == kAnchored;
2460d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  bool longest = kind != kFirstMatch;
2470d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (!b.Search(text, context, anchored, longest, match, nmatch))
2480d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return false;
2490d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  if (kind == kFullMatch && match[0].end() != text.end())
2500d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    return false;
2510d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  return true;
2520d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}
2530d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin
2540d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin}  // namespace re2
255