1// Copyright 2009 The RE2 Authors.  All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Prefilter is the class used to extract string guards from regexps.
6// Rather than using Prefilter class directly, use FilteredRE2.
7// See filtered_re2.h
8
9#ifndef RE2_PREFILTER_H_
10#define RE2_PREFILTER_H_
11
12#include "util/util.h"
13
14namespace re2 {
15
16class RE2;
17
18class Regexp;
19
20class Prefilter {
21  // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h
22 public:
23  enum Op {
24    ALL = 0,  // Everything matches
25    NONE,  // Nothing matches
26    ATOM,  // The string atom() must match
27    AND,   // All in subs() must match
28    OR,   // One of subs() must match
29  };
30
31  explicit Prefilter(Op op);
32  ~Prefilter();
33
34  Op op() { return op_; }
35  const string& atom() const { return atom_; }
36  void set_unique_id(int id) { unique_id_ = id; }
37  int unique_id() const { return unique_id_; }
38
39  // The children of the Prefilter node.
40  vector<Prefilter*>* subs() {
41    CHECK(op_ == AND || op_ == OR);
42    return subs_;
43  }
44
45  // Set the children vector. Prefilter takes ownership of subs and
46  // subs_ will be deleted when Prefilter is deleted.
47  void set_subs(vector<Prefilter*>* subs) { subs_ = subs; }
48
49  // Given a RE2, return a Prefilter. The caller takes ownership of
50  // the Prefilter and should deallocate it. Returns NULL if Prefilter
51  // cannot be formed.
52  static Prefilter* FromRE2(const RE2* re2);
53
54  // Returns a readable debug string of the prefilter.
55  string DebugString() const;
56
57 private:
58  class Info;
59
60  // Combines two prefilters together to create an AND. The passed
61  // Prefilters will be part of the returned Prefilter or deleted.
62  static Prefilter* And(Prefilter* a, Prefilter* b);
63
64  // Combines two prefilters together to create an OR. The passed
65  // Prefilters will be part of the returned Prefilter or deleted.
66  static Prefilter* Or(Prefilter* a, Prefilter* b);
67
68  // Generalized And/Or
69  static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b);
70
71  static Prefilter* FromRegexp(Regexp* a);
72
73  static Prefilter* FromString(const string& str);
74
75  static Prefilter* OrStrings(set<string>* ss);
76
77  static Info* BuildInfo(Regexp* re);
78
79  Prefilter* Simplify();
80
81  // Kind of Prefilter.
82  Op op_;
83
84  // Sub-matches for AND or OR Prefilter.
85  vector<Prefilter*>* subs_;
86
87  // Actual string to match in leaf node.
88  string atom_;
89
90  // If different prefilters have the same string atom, or if they are
91  // structurally the same (e.g., OR of same atom strings) they are
92  // considered the same unique nodes. This is the id for each unique
93  // node. This field is populated with a unique id for every node,
94  // and -1 for duplicate nodes.
95  int unique_id_;
96
97  // Used for debugging, helps in tracking memory leaks.
98  int alloc_id_;
99
100  DISALLOW_EVIL_CONSTRUCTORS(Prefilter);
101};
102
103}  // namespace re2
104
105#endif  // RE2_PREFILTER_H_
106