12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2010 The RE2 Authors. All Rights Reserved. 22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style 32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file. 42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/set.h" 62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/util.h" 82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/stringpiece.h" 92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/prog.h" 102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/re2.h" 112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "re2/regexp.h" 122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonusing namespace re2; 142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonRE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) { 162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson options_.Copy(options); 172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson anchor_ = anchor; 182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson prog_ = NULL; 192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson compiled_ = false; 202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian HodsonRE2::Set::~Set() { 232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < re_.size(); i++) 242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_[i]->Decref(); 252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete prog_; 262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonint RE2::Set::Add(const StringPiece& pattern, string* error) { 292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (compiled_) { 302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(DFATAL) << "RE2::Set::Add after Compile"; 312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return -1; 322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( 352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson options_.ParseFlags()); 362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson RegexpStatus status; 382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re2::Regexp* re = Regexp::Parse(pattern, pf, &status); 392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re == NULL) { 402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (error != NULL) 412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson *error = status.Text(); 422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (options_.log_errors()) 432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text(); 442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return -1; 452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Concatenate with match index and push on vector. 482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int n = re_.size(); 492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re2::Regexp* m = re2::Regexp::HaveMatch(n, pf); 502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re->op() == kRegexpConcat) { 512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int nsub = re->nsub(); 522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re2::Regexp** sub = new re2::Regexp*[nsub + 1]; 532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < nsub; i++) 542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson sub[i] = re->sub()[i]->Incref(); 552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson sub[nsub] = m; 562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re->Decref(); 572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re = re2::Regexp::Concat(sub, nsub + 1, pf); 582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] sub; 592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } else { 602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re2::Regexp* sub[2]; 612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson sub[0] = re; 622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson sub[1] = m; 632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re = re2::Regexp::Concat(sub, 2, pf); 642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_.push_back(re); 662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return n; 672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Set::Compile() { 702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (compiled_) { 712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(DFATAL) << "RE2::Set::Compile multiple times"; 722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson compiled_ = true; 752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>( 772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson options_.ParseFlags()); 782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(&re_[0]), 792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_.size(), pf); 802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re_.clear(); 812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re2::Regexp* sre = re->Simplify(); 822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re->Decref(); 832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson re = sre; 842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (re == NULL) { 852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (options_.log_errors()) 862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(ERROR) << "Error simplifying during Compile."; 872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson prog_ = Prog::CompileSet(options_, anchor_, re); 912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return prog_ != NULL; 922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonbool RE2::Set::Match(const StringPiece& text, vector<int>* v) const { 952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!compiled_) { 962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(DFATAL) << "RE2::Set::Match without Compile"; 972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson v->clear(); 1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool failed; 1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, 1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson Prog::kManyMatch, NULL, &failed, v); 1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (failed) 1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space"; 1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (ret == false) 1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (v->size() == 0) { 1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set"; 1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return true; 1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} 114