15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2008 The RE2 Authors. All Rights Reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Tested by search_test.cc, exhaustive_test.cc, tester.cc 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Prog::SearchBitState is a regular expression search with submatch 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// tracking for small regular expressions and texts. Like 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// testing/backtrack.cc, it allocates a bit vector with (length of 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// text) * (length of prog) bits, to make sure it never explores the 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// same (character position, instruction) state multiple times. This 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// limits the search to run in time linear in the length of the text. 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Unlike testing/backtrack.cc, SearchBitState is not recursive 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// on the text. 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// SearchBitState is a fast replacement for the NFA code on small 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// regexps and texts when SearchOnePass cannot be used. 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prog.h" 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/regexp.h" 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 { 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct Job { 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int id; 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int arg; 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* p; 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BitState { 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) explicit BitState(Prog* prog); 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~BitState(); 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The usual Search prototype. 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Can only call Search once per BitState. 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Search(const StringPiece& text, const StringPiece& context, 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool anchored, bool longest, 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* submatch, int nsubmatch); 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline bool ShouldVisit(int id, const char* p); 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void Push(int id, const char* p, int arg); 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool GrowStack(); 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool TrySearch(int id, const char* p); 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Search parameters 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog* prog_; // program being run 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece text_; // text being searched 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece context_; // greater context of text being searched 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool anchored_; // whether search is anchored at text.begin() 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool longest_; // whether search wants leftmost-longest match 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool endmatch_; // whether match must end at text.end() 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece *submatch_; // submatches to fill in 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nsubmatch_; // # of submatches to fill in 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Search state 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char** cap_; // capture registers 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int ncap_; 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const int VisitedBits = 32; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) uint32 *visited_; // bitmap: (Inst*, char*) pairs already backtracked 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nvisited_; // # of words in bitmap 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Job *job_; // stack of text positions to explore 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int njob_; 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int maxjob_; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BitState::BitState(Prog* prog) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : prog_(prog), 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) anchored_(false), 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest_(false), 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) endmatch_(false), 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch_(NULL), 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nsubmatch_(0), 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_(NULL), 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ncap_(0), 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) visited_(NULL), 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nvisited_(0), 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) job_(NULL), 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) njob_(0), 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) maxjob_(0) { 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BitState::~BitState() { 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] visited_; 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] job_; 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] cap_; 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Should the search visit the pair ip, p? 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// If so, remember that it was visited so that the next time, 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// we don't repeat the visit. 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool BitState::ShouldVisit(int id, const char* p) { 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) uint n = id * (text_.size() + 1) + (p - text_.begin()); 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (visited_[n/VisitedBits] & (1 << (n & (VisitedBits-1)))) 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) visited_[n/VisitedBits] |= 1 << (n & (VisitedBits-1)); 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Grow the stack. 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool BitState::GrowStack() { 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // VLOG(0) << "Reallocate."; 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) maxjob_ *= 2; 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Job* newjob = new Job[maxjob_]; 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memmove(newjob, job_, njob_*sizeof job_[0]); 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete[] job_; 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) job_ = newjob; 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (njob_ >= maxjob_) { 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "Job stack overflow."; 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Push the triple (id, p, arg) onto the stack, growing it if necessary. 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void BitState::Push(int id, const char* p, int arg) { 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (njob_ >= maxjob_) { 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!GrowStack()) 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int op = prog_->inst(id)->opcode(); 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (op == kInstFail) 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Only check ShouldVisit when arg == 0. 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // When arg > 0, we are continuing a previous visit. 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (arg == 0 && !ShouldVisit(id, p)) 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Job* j = &job_[njob_++]; 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) j->id = id; 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) j->p = p; 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) j->arg = arg; 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Try a search from instruction id0 in state p0. 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Return whether it succeeded. 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool BitState::TrySearch(int id0, const char* p0) { 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool matched = false; 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* end = text_.end(); 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) njob_ = 0; 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Push(id0, p0, 0); 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (njob_ > 0) { 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Pop job off stack. 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) --njob_; 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int id = job_[njob_].id; 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char* p = job_[njob_].p; 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int arg = job_[njob_].arg; 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Optimization: rather than push and pop, 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // code that is going to Push and continue 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the loop simply updates ip, p, and arg 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and jumps to CheckAndLoop. We have to 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // do the ShouldVisit check that Push 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // would have, but we avoid the stack 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // manipulation. 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (0) { 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CheckAndLoop: 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!ShouldVisit(id, p)) 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Visit ip, p. 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // VLOG(0) << "Job: " << ip->id() << " " 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // << (p - text_.begin()) << " " << arg; 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prog::Inst* ip = prog_->inst(id); 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (ip->opcode()) { 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstFail: 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "Unexpected opcode: " << ip->opcode() << " arg " << arg; 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstAlt: 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Cannot just 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Push(ip->out1(), p, 0); 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Push(ip->out(), p, 0); 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If, during the processing of ip->out(), we encounter 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // ip->out1() via another path, we want to process it then. 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Pushing it here will inhibit that. Instead, re-push 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // ip with arg==1 as a reminder to push ip->out1() later. 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (arg) { 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case 0: 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Push(id, p, 1); // come back when we're done 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out(); 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto CheckAndLoop; 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case 1: 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Finished ip->out(); try ip->out1(). 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) arg = 0; 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out1(); 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto CheckAndLoop; 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "Bad arg in kInstCapture: " << arg; 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstAltMatch: 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // One opcode is byte range; the other leads to match. 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->greedy(prog_)) { 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // out1 is the match 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Push(ip->out1(), p, 0); 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out1(); 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p = end; 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto CheckAndLoop; 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // out is the match - non-greedy 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Push(ip->out(), end, 0); 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out(); 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto CheckAndLoop; 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstByteRange: { 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int c = -1; 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p < end) 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) c = *p & 0xFF; 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->Matches(c)) { 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out(); 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) p++; 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto CheckAndLoop; 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstCapture: 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (arg) { 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case 0: 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (0 <= ip->cap() && ip->cap() < ncap_) { 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Capture p to register, but save old value. 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Push(id, cap_[ip->cap()], 1); // come back when we're done 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_[ip->cap()] = p; 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Continue on. 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out(); 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto CheckAndLoop; 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case 1: 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Finished ip->out(); restore the old value. 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_[ip->cap()] = p; 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "Bad arg in kInstCapture: " << arg; 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstEmptyWidth: 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ip->empty() & ~Prog::EmptyFlags(context_, p)) 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out(); 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto CheckAndLoop; 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstNop: 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) id = ip->out(); 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) goto CheckAndLoop; 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case kInstMatch: { 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (endmatch_ && p != text_.end()) 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // VLOG(0) << "Found match."; 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We found a match. If the caller doesn't care 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // where the match is, no point going further. 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nsubmatch_ == 0) 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Record best match so far. 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Only need to check end point, because this entire 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // call is only considering one start position. 2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matched = true; 2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_[1] = p; 2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (submatch_[0].data() == NULL || 2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (longest_ && p > submatch_[0].end())) { 2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < nsubmatch_; i++) 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch_[i] = StringPiece(cap_[2*i], cap_[2*i+1] - cap_[2*i]); 2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If going for first match, we're done. 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!longest_) 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If we used the entire text, no longer match is possible. 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (p == text_.end()) 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Otherwise, continue on in hope of a longer match. 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) continue; 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return matched; 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Search text (within context) for prog_. 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool BitState::Search(const StringPiece& text, const StringPiece& context, 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool anchored, bool longest, 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* submatch, int nsubmatch) { 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Search parameters. 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) text_ = text; 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) context_ = context; 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (context_.begin() == NULL) 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) context_ = text; 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_->anchor_start() && context_.begin() != text.begin()) 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (prog_->anchor_end() && context_.end() != text.end()) 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) anchored_ = anchored || prog_->anchor_start(); 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) longest_ = longest || prog_->anchor_end(); 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) endmatch_ = prog_->anchor_end(); 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch_ = submatch; 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nsubmatch_ = nsubmatch; 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < nsubmatch_; i++) 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) submatch_[i] = NULL; 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Allocate scratch space. 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nvisited_ = (prog_->size() * (text.size()+1) + VisitedBits-1) / VisitedBits; 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) visited_ = new uint32[nvisited_]; 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(visited_, 0, nvisited_*sizeof visited_[0]); 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // VLOG(0) << "nvisited_ = " << nvisited_; 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ncap_ = 2*nsubmatch; 3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (ncap_ < 2) 3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ncap_ = 2; 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_ = new const char*[ncap_]; 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memset(cap_, 0, ncap_*sizeof cap_[0]); 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) maxjob_ = 256; 3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) job_ = new Job[maxjob_]; 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Anchored search must start at text.begin(). 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (anchored_) { 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_[0] = text.begin(); 3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return TrySearch(prog_->start(), text.begin()); 3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Unanchored search, starting from each possible text position. 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Notice that we have to try the empty string at the end of 3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the text, so the loop condition is p <= text.end(), not p < text.end(). 3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This looks like it's quadratic in the size of the text, 3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // but we are not clearing visited_ between calls to TrySearch, 3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // so no work is duplicated and it ends up still being linear. 3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (const char* p = text.begin(); p <= text.end(); p++) { 3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) cap_[0] = p; 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TrySearch(prog_->start(), p)) // Match must be leftmost; done. 3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Bit-state search. 3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool Prog::SearchBitState(const StringPiece& text, 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& context, 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Anchor anchor, 3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MatchKind kind, 3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece* match, 3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int nmatch) { 3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If full match, we ask for an anchored longest match 3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and then check that match[0] == text. 3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // So make sure match[0] exists. 3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringPiece sp0; 3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (kind == kFullMatch) { 3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) anchor = kAnchored; 3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (nmatch < 1) { 3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) match = &sp0; 3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) nmatch = 1; 3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Run the search. 3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BitState b(this); 3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool anchored = anchor == kAnchored; 3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool longest = kind != kFirstMatch; 3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!b.Search(text, context, anchored, longest, match, nmatch)) 3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (kind == kFullMatch && match[0].end() != text.end()) 3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace re2 379