15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2009 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h"
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/filtered_re2.h"
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prefilter.h"
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "re2/prefilter_tree.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)FilteredRE2::FilteredRE2()
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : compiled_(false),
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      prefilter_tree_(new PrefilterTree()) {
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)FilteredRE2::~FilteredRE2() {
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < re2_vec_.size(); i++)
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete re2_vec_[i];
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  delete prefilter_tree_;
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)RE2::ErrorCode FilteredRE2::Add(const StringPiece& pattern,
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                const RE2::Options& options, int* id) {
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RE2* re = new RE2(pattern, options);
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  RE2::ErrorCode code = re->error_code();
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!re->ok()) {
302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (options.log_errors()) {
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      LOG(ERROR) << "Couldn't compile regular expression, skipping: "
322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                 << re << " due to error " << re->error();
332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete re;
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *id = re2_vec_.size();
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    re2_vec_.push_back(re);
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return code;
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void FilteredRE2::Compile(vector<string>* atoms) {
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (compiled_ || re2_vec_.size() == 0) {
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG(INFO) << "C: " << compiled_ << " S:" << re2_vec_.size();
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < re2_vec_.size(); i++) {
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Prefilter* prefilter = Prefilter::FromRE2(re2_vec_[i]);
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    prefilter_tree_->Add(prefilter);
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  atoms->clear();
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  prefilter_tree_->Compile(atoms);
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  compiled_ = true;
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int FilteredRE2::SlowFirstMatch(const StringPiece& text) const {
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < re2_vec_.size(); i++)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (RE2::PartialMatch(text, *re2_vec_[i]))
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return i;
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return -1;
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int FilteredRE2::FirstMatch(const StringPiece& text,
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            const vector<int>& atoms) const {
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!compiled_) {
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    LOG(DFATAL) << "FirstMatch called before Compile";
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return -1;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  vector<int> regexps;
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < regexps.size(); i++)
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return regexps[i];
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return -1;
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool FilteredRE2::AllMatches(
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const StringPiece& text,
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const vector<int>& atoms,
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    vector<int>* matching_regexps) const {
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  matching_regexps->clear();
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  vector<int> regexps;
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  prefilter_tree_->RegexpsGivenStrings(atoms, &regexps);
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < regexps.size(); i++)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (RE2::PartialMatch(text, *re2_vec_[regexps[i]]))
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      matching_regexps->push_back(regexps[i]);
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return !matching_regexps->empty();
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void FilteredRE2::RegexpsGivenStrings(const vector<int>& matched_atoms,
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      vector<int>* passed_regexps) {
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  prefilter_tree_->RegexpsGivenStrings(matched_atoms, passed_regexps);
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void FilteredRE2::PrintPrefilter(int regexpid) {
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  prefilter_tree_->PrintPrefilter(regexpid);
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
103