15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright 2009 The RE2 Authors.  All Rights Reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// license that can be found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Prefilter is the class used to extract string guards from regexps.
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Rather than using Prefilter class directly, use FilteredRE2.
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// See filtered_re2.h
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef RE2_PREFILTER_H_
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define RE2_PREFILTER_H_
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "util/util.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace re2 {
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class RE2;
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Regexp;
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class Prefilter {
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum Op {
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ALL = 0,  // Everything matches
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NONE,  // Nothing matches
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ATOM,  // The string atom() must match
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AND,   // All in subs() must match
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    OR,   // One of subs() must match
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  explicit Prefilter(Op op);
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~Prefilter();
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Op op() { return op_; }
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const string& atom() const { return atom_; }
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void set_unique_id(int id) { unique_id_ = id; }
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int unique_id() const { return unique_id_; }
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The children of the Prefilter node.
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  vector<Prefilter*>* subs() {
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CHECK(op_ == AND || op_ == OR);
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return subs_;
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set the children vector. Prefilter takes ownership of subs and
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // subs_ will be deleted when Prefilter is deleted.
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void set_subs(vector<Prefilter*>* subs) { subs_ = subs; }
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Given a RE2, return a Prefilter. The caller takes ownership of
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the Prefilter and should deallocate it. Returns NULL if Prefilter
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // cannot be formed.
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Prefilter* FromRE2(const RE2* re2);
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns a readable debug string of the prefilter.
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string DebugString() const;
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  class Info;
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Combines two prefilters together to create an AND. The passed
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Prefilters will be part of the returned Prefilter or deleted.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Prefilter* And(Prefilter* a, Prefilter* b);
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Combines two prefilters together to create an OR. The passed
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Prefilters will be part of the returned Prefilter or deleted.
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Prefilter* Or(Prefilter* a, Prefilter* b);
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Generalized And/Or
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b);
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Prefilter* FromRegexp(Regexp* a);
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Prefilter* FromString(const string& str);
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Prefilter* OrStrings(set<string>* ss);
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static Info* BuildInfo(Regexp* re);
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Prefilter* Simplify();
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Kind of Prefilter.
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Op op_;
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Sub-matches for AND or OR Prefilter.
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  vector<Prefilter*>* subs_;
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Actual string to match in leaf node.
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string atom_;
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If different prefilters have the same string atom, or if they are
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // structurally the same (e.g., OR of same atom strings) they are
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // considered the same unique nodes. This is the id for each unique
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // node. This field is populated with a unique id for every node,
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // and -1 for duplicate nodes.
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int unique_id_;
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Used for debugging, helps in tracking memory leaks.
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int alloc_id_;
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_EVIL_CONSTRUCTORS(Prefilter);
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace re2
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // RE2_PREFILTER_H_
106