1// Copyright 2009 The RE2 Authors.  All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.
6// It provides a prefilter mechanism that helps in cutting down the
7// number of regexps that need to be actually searched.
8//
9// By design, it does not include a string matching engine. This is to
10// allow the user of the class to use their favorite string match
11// engine. The overall flow is: Add all the regexps using Add, then
12// Compile the FilteredRE2. The compile returns strings that need to
13// be matched. Note that all returned strings are lowercase. For
14// applying regexps to a search text, the caller does the string
15// matching using the strings returned. When doing the string match,
16// note that the caller has to do that on lower cased version of the
17// search text. Then call FirstMatch or AllMatches with a vector of
18// indices of strings that were found in the text to get the actual
19// regexp matches.
20
21#ifndef RE2_FILTERED_RE2_H_
22#define RE2_FILTERED_RE2_H_
23
24#include <vector>
25#include "re2/re2.h"
26
27namespace re2 {
28using std::vector;
29
30class PrefilterTree;
31
32class FilteredRE2 {
33 public:
34  FilteredRE2();
35  ~FilteredRE2();
36
37  // Uses RE2 constructor to create a RE2 object (re). Returns
38  // re->error_code(). If error_code is other than NoError, then re is
39  // deleted and not added to re2_vec_.
40  RE2::ErrorCode Add(const StringPiece& pattern,
41                     const RE2::Options& options,
42                     int *id);
43
44  // Prepares the regexps added by Add for filtering.  Returns a set
45  // of strings that the caller should check for in candidate texts.
46  // The returned strings are lowercased. When doing string matching,
47  // the search text should be lowercased first to find matching
48  // strings from the set of strings returned by Compile.  Call after
49  // all Add calls are done.
50  void Compile(vector<string>* strings_to_match);
51
52  // Returns the index of the first matching regexp.
53  // Returns -1 on no match. Can be called prior to Compile.
54  // Does not do any filtering: simply tries to Match the
55  // regexps in a loop.
56  int SlowFirstMatch(const StringPiece& text) const;
57
58  // Returns the index of the first matching regexp.
59  // Returns -1 on no match. Compile has to be called before
60  // calling this.
61  int FirstMatch(const StringPiece& text,
62                 const vector<int>& atoms) const;
63
64  // Returns the indices of all matching regexps, after first clearing
65  // matched_regexps.
66  bool AllMatches(const StringPiece& text,
67                  const vector<int>& atoms,
68                  vector<int>* matching_regexps) const;
69
70  // The number of regexps added.
71  int NumRegexps() const { return re2_vec_.size(); }
72
73 private:
74
75  // Get the individual RE2 objects. Useful for testing.
76  RE2* GetRE2(int regexpid) const { return re2_vec_[regexpid]; }
77
78  // Print prefilter.
79  void PrintPrefilter(int regexpid);
80
81  // Useful for testing and debugging.
82  void RegexpsGivenStrings(const vector<int>& matched_atoms,
83                           vector<int>* passed_regexps);
84
85  // All the regexps in the FilteredRE2.
86  vector<RE2*> re2_vec_;
87
88  // Has the FilteredRE2 been compiled using Compile()
89  bool compiled_;
90
91  // An AND-OR tree of string atoms used for filtering regexps.
92  PrefilterTree* prefilter_tree_;
93
94  //DISALLOW_EVIL_CONSTRUCTORS(FilteredRE2);
95  FilteredRE2(const FilteredRE2&);
96  void operator=(const FilteredRE2&);
97};
98
99}  // namespace re2
100
101#endif  // RE2_FILTERED_RE2_H_
102