filtered_re2.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
18bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)// Copyright 2009 The RE2 Authors. All Rights Reserved. 28bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)// Use of this source code is governed by a BSD-style 38bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)// license that can be found in the LICENSE file. 48bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) 58bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)#include <string> 68bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)#include "util/util.h" 78bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)#include "re2/filtered_re2.h" 88bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)#include "re2/prefilter.h" 96d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)#include "re2/prefilter_tree.h" 1023730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) 116d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)namespace re2 { 12a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 13effb81e5f8246d0db0270817048dc992db66e9fbBen MurdochFilteredRE2::FilteredRE2() 14effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch : compiled_(false), 156d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) prefilter_tree_(new PrefilterTree()) { 166d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)} 178bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) 1823730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)FilteredRE2::~FilteredRE2() { 19010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) for (int i = 0; i < re2_vec_.size(); i++) 208bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) delete re2_vec_[i]; 218bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) delete prefilter_tree_; 226d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)} 236d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) 246d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern, 256d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) const RE2::Options& options, int* id) { 266d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) RE2* re = new RE2(pattern, options); 275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) RE2::ErrorCode code = re->error_code(); 285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (!re->ok()) { 305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) LOG(ERROR) << "Couldn't compile regular expression, skipping: " 315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) << re << " due to error " << re->error(); 325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) delete re; 338bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) } else { 348bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) *id = re2_vec_.size(); 35effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch re2_vec_.push_back(re); 366d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) } 37effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch 388bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) return code; 395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void FilteredRE2::Compile(vector<string>* atoms) { 4203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if (compiled_ || re2_vec_.size() == 0) { 4303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size(); 4403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return; 458bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) } 468bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) 478bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) for (int i = 0; i < re2_vec_.size(); i++) { 48effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]); 49effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch prefilter_tree_->Add(prefilter); 50effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch } 51effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch atoms->clear(); 52effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch prefilter_tree_->Compile(atoms); 536d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) compiled_ = true; 546d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)} 556d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) 56116680a4aac90f2aa7413d9095a592090648e557Ben Murdochint FilteredRE2::SlowFirstMatch(const StringPiece& text) const { 57116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch for (int i = 0; i < re2_vec_.size(); i++) 586d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) if (RE2::PartialMatch(text, *re2_vec_[i])) 596d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) return i; 605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) return -1; 615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)} 626d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) 635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)int FilteredRE2::FirstMatch(const StringPiece& text, 645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const vector<int>& atoms) const { 655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (!compiled_) { 665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) LOG(DFATAL) << "FirstMatch called before Compile"; 675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return -1; 685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) vector<int> regexps; 705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); 71effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch for (int i = 0; i < regexps.size(); i++) 72effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) 73effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return regexps[i]; 74effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return -1; 7523730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)} 7623730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) 7723730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)bool FilteredRE2::AllMatches( 7823730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) const StringPiece& text, 7923730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) const vector<int>& atoms, 8023730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) vector<int>* matching_regexps) const { 8123730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) matching_regexps->clear(); 8223730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) vector<int> regexps; 83010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) prefilter_tree_->RegexpsGivenStrings(atoms, ®exps); 84010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) for (int i = 0; i < regexps.size(); i++) 85010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) if (RE2::PartialMatch(text, *re2_vec_[regexps[i]])) 86010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) matching_regexps->push_back(regexps[i]); 87010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) return !matching_regexps->empty(); 886d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)} 896d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) 906d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)void FilteredRE2::RegexpsGivenStrings(const vector<int>& matched_atoms, 916d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) vector<int>* passed_regexps) { 926d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps); 936d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)} 946d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) 956d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) 966d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)void FilteredRE2::PrintPrefilter(int regexpid) { 976d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) prefilter_tree_->PrintPrefilter(regexpid); 986d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)} 996d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) 1006d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)} // namespace re2 1016d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)