filtered_re2.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
18bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)// Copyright 2009 The RE2 Authors.  All Rights Reserved.
28bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)// Use of this source code is governed by a BSD-style
38bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)// license that can be found in the LICENSE file.
48bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)
58bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)#include <string>
68bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)#include "util/util.h"
78bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)#include "re2/filtered_re2.h"
88bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)#include "re2/prefilter.h"
96d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)#include "re2/prefilter_tree.h"
1023730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)
116d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)namespace re2 {
12a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
13effb81e5f8246d0db0270817048dc992db66e9fbBen MurdochFilteredRE2::FilteredRE2()
14effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    : compiled_(false),
156d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)      prefilter_tree_(new PrefilterTree()) {
166d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)}
178bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)
1823730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)FilteredRE2::~FilteredRE2() {
19010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  for (int i = 0; i < re2_vec_.size(); i++)
208bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)    delete re2_vec_[i];
218bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)  delete prefilter_tree_;
226d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)}
236d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)
246d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
256d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)                                const RE2::Options& options, int* id) {
266d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  RE2* re = new RE2(pattern, options);
275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  RE2::ErrorCode code = re->error_code();
285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (!re->ok()) {
305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    LOG(ERROR) << "Couldn't compile regular expression, skipping: "
315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)               << re << " due to error " << re->error();
325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    delete re;
338bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)  } else {
348bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)    *id = re2_vec_.size();
35effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    re2_vec_.push_back(re);
366d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  }
37effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
388bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)  return code;
395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void FilteredRE2::Compile(vector<string>* atoms) {
4203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  if (compiled_ || re2_vec_.size() == 0) {
4303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size();
4403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    return;
458bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)  }
468bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)
478bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)  for (int i = 0; i < re2_vec_.size(); i++) {
48effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]);
49effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    prefilter_tree_->Add(prefilter);
50effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  }
51effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  atoms->clear();
52effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  prefilter_tree_->Compile(atoms);
536d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  compiled_ = true;
546d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)}
556d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)
56116680a4aac90f2aa7413d9095a592090648e557Ben Murdochint FilteredRE2::SlowFirstMatch(const StringPiece& text) const {
57116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  for (int i = 0; i < re2_vec_.size(); i++)
586d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)    if (RE2::PartialMatch(text, *re2_vec_[i]))
596d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)      return i;
605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return -1;
615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
626d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)
635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)int FilteredRE2::FirstMatch(const StringPiece& text,
645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                            const vector<int>& atoms) const {
655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (!compiled_) {
665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    LOG(DFATAL) << "FirstMatch called before Compile";
675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return -1;
685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  }
695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  vector<int> regexps;
705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
71effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  for (int i = 0; i < regexps.size(); i++)
72effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
73effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      return regexps[i];
74effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  return -1;
7523730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)}
7623730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)
7723730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)bool FilteredRE2::AllMatches(
7823730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)    const StringPiece& text,
7923730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)    const vector<int>& atoms,
8023730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)    vector<int>* matching_regexps) const {
8123730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)  matching_regexps->clear();
8223730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)  vector<int> regexps;
83010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
84010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  for (int i = 0; i < regexps.size(); i++)
85010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
86010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      matching_regexps->push_back(regexps[i]);
87010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  return !matching_regexps->empty();
886d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)}
896d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)
906d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)void FilteredRE2::RegexpsGivenStrings(const vector<int>& matched_atoms,
916d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)                                      vector<int>* passed_regexps) {
926d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps);
936d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)}
946d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)
956d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)
966d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)void FilteredRE2::PrintPrefilter(int regexpid) {
976d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)  prefilter_tree_->PrintPrefilter(regexpid);
986d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)}
996d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)
1006d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)}  // namespace re2
1016d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles)