15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2009 The RE2 Authors. All Rights Reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h" 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/filtered_re2.h" 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prefilter.h" 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prefilter_tree.h" 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 { 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)FilteredRE2::FilteredRE2() 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : compiled_(false), 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefilter_tree_(new PrefilterTree()) { 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)FilteredRE2::~FilteredRE2() { 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < re2_vec_.size(); i++) 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete re2_vec_[i]; 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete prefilter_tree_; 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const RE2::Options& options, int* id) { 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RE2* re = new RE2(pattern, options); 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RE2::ErrorCode code = re->error_code(); 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!re->ok()) { 302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (options.log_errors()) { 312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LOG(ERROR) << "Couldn't compile regular expression, skipping: " 322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) << re << " due to error " << re->error(); 332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delete re; 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) *id = re2_vec_.size(); 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) re2_vec_.push_back(re); 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return code; 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void FilteredRE2::Compile(vector<string>* atoms) { 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (compiled_ || re2_vec_.size() == 0) { 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size(); 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < re2_vec_.size(); i++) { 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]); 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefilter_tree_->Add(prefilter); 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) atoms->clear(); 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefilter_tree_->Compile(atoms); 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) compiled_ = true; 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int FilteredRE2::SlowFirstMatch(const StringPiece& text) const { 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < re2_vec_.size(); i++) 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (RE2::PartialMatch(text, *re2_vec_[i])) 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return i; 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int FilteredRE2::FirstMatch(const StringPiece& text, 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const vector<int>& atoms) const { 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!compiled_) { 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) LOG(DFATAL) << "FirstMatch called before Compile"; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) vector<int> regexps; 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < regexps.size(); i++) 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return regexps[i]; 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool FilteredRE2::AllMatches( 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const StringPiece& text, 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const vector<int>& atoms, 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) vector<int>* matching_regexps) const { 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matching_regexps->clear(); 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) vector<int> regexps; 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int i = 0; i < regexps.size(); i++) 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) matching_regexps->push_back(regexps[i]); 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return !matching_regexps->empty(); 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void FilteredRE2::RegexpsGivenStrings(const vector<int>& matched_atoms, 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) vector<int>* passed_regexps) { 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps); 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void FilteredRE2::PrintPrefilter(int regexpid) { 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) prefilter_tree_->PrintPrefilter(regexpid); 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace re2 103