re2-memory-optimization.patch revision c2e0dbddbe15c98d52c4786dac06cb8952a8ae6d
1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc
2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)--- a/re2/prefilter_tree.cc
3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+++ b/re2/prefilter_tree.cc
4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -107,21 +107,23 @@ void PrefilterTree::Compile(vector<string>* atom_vec) {
5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   // not miss out on any regexps triggering by getting rid of a
6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   // prefilter node.
7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   for (int i = 0; i < entries_.size(); i++) {
8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-    IntMap* parents = entries_[i].parents;
9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+    StdIntMap* parents = entries_[i].parents;
10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     if (parents->size() > 8) {
11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       // This one triggers too many things. If all the parents are AND
12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       // nodes and have other things guarding them, then get rid of
13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       // this trigger. TODO(vsri): Adjust the threshold appropriately,
14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       // make it a function of total number of nodes?
15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       bool have_other_guard = true;
16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-      for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+      for (StdIntMap::iterator it = parents->begin();
18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+           it != parents->end(); ++it) {
19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)         have_other_guard = have_other_guard &&
20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-            (entries_[it->index()].propagate_up_at_count > 1);
21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+            (entries_[it->first].propagate_up_at_count > 1);
22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+      }
23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       if (have_other_guard) {
25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-        for (IntMap::iterator it = parents->begin();
26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+        for (StdIntMap::iterator it = parents->begin();
27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)              it != parents->end(); ++it)
28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-          entries_[it->index()].propagate_up_at_count -= 1;
29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+          entries_[it->first].propagate_up_at_count -= 1;
30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)         parents->clear();  // Forget the parents
32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       }
33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -213,7 +215,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   }
35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   entries_.resize(node_map_.size());
36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-  // Create parent IntMap for the entries.
38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+  // Create parent StdIntMap for the entries.
39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   for (int i = v.size()  - 1; i >= 0; i--) {
40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     Prefilter* prefilter = v[i];
41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     if (prefilter == NULL)
42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -223,7 +225,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       continue;
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     Entry* entry = &entries_[prefilter->unique_id()];
46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-    entry->parents = new IntMap(node_map_.size());
47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+    entry->parents = new StdIntMap();
48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   }
49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   // Fill the entries.
51c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -249,7 +251,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       case Prefilter::OR:
54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       case Prefilter::AND: {
55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-        IntMap uniq_child(node_map_.size());
56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+        std::set<int> uniq_child;
57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)         for (int j = 0; j < prefilter->subs()->size() ; j++) {
58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)           Prefilter* child = (*prefilter->subs())[j];
59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)           Prefilter* canonical = CanonicalNode(child);
60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -258,12 +260,12 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)             return;
62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)           }
63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)           int child_id = canonical->unique_id();
64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-          if (!uniq_child.has_index(child_id))
65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-            uniq_child.set_new(child_id, 1);
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+          uniq_child.insert(child_id);
67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)           // To the child, we want to add to parent indices.
68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)           Entry* child_entry = &entries_[child_id];
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-          if (!child_entry->parents->has_index(prefilter->unique_id()))
70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-            child_entry->parents->set_new(prefilter->unique_id(), 1);
71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+          if (child_entry->parents->find(prefilter->unique_id()) ==
72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+              child_entry->parents->end())
73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+            (*child_entry->parents)[prefilter->unique_id()] = 1;
74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)         }
75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)         entry->propagate_up_at_count =
76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)             prefilter->op() == Prefilter::AND ? uniq_child.size() : 1;
77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -329,10 +331,10 @@ void PrefilterTree::PropagateMatch(const vector<int>& atom_ids,
78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     }
79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     int c;
80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     // Pass trigger up to parents.
81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-    for (IntMap::iterator it = entry.parents->begin();
82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+    for (StdIntMap::iterator it = entry.parents->begin();
83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          it != entry.parents->end();
84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)          ++it) {
85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-      int j = it->index();
86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+      int j = it->first;
87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       const Entry& parent = entries_[j];
88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count;
89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)       // Delay until all the children have succeeded.
90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -364,12 +366,12 @@ void PrefilterTree::PrintDebugInfo() {
91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   VLOG(10) << "#Unique Nodes: " << entries_.size();
92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   for (int i = 0; i < entries_.size(); ++i) {
94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-    IntMap* parents = entries_[i].parents;
95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+    StdIntMap* parents = entries_[i].parents;
96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     const vector<int>& regexps = entries_[i].regexps;
97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     VLOG(10) << "EntryId: " << i
98c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)             << " N: " << parents->size() << " R: " << regexps.size();
99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-    for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-      VLOG(10) << it->index();
101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+    for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
102c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+      VLOG(10) << it->first;
103c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   }
104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   VLOG(10) << "Map:";
105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)   for (map<string, Prefilter*>::const_iterator iter = node_map_.begin();
106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h
107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)--- a/re2/prefilter_tree.h
108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+++ b/re2/prefilter_tree.h
109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -16,12 +16,15 @@
110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) #ifndef RE2_PREFILTER_TREE_H_
111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) #define RE2_PREFILTER_TREE_H_
112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+#include <map>
114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+
115c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) #include "util/util.h"
116c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) #include "util/sparse_array.h"
117c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
118c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) namespace re2 {
119c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
120c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) typedef SparseArray<int> IntMap;
121c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+typedef std::map<int, int> StdIntMap;
122c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
123c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) class Prefilter;
124c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
125c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -71,7 +74,7 @@ class PrefilterTree {
126c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     // are two different nodes, but they share the atom 'def'. So when
127c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     // 'def' matches, it triggers two parents, corresponding to the two
128c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     // different OR nodes.
129c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)-    IntMap* parents;
130c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+    StdIntMap* parents;
131c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 
132c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     // When this node is ready to trigger the parent, what are the
133c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)     // regexps that are triggered.
134