re2-memory-optimization.patch revision c2e0dbddbe15c98d52c4786dac06cb8952a8ae6d
11320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccidiff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc
21320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci--- a/re2/prefilter_tree.cc
31320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+++ b/re2/prefilter_tree.cc
41320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci@@ -107,21 +107,23 @@ void PrefilterTree::Compile(vector<string>* atom_vec) {
51320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   // not miss out on any regexps triggering by getting rid of a
61320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   // prefilter node.
71320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   for (int i = 0; i < entries_.size(); i++) {
81320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci-    IntMap* parents = entries_[i].parents;
91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+    StdIntMap* parents = entries_[i].parents;
101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci     if (parents->size() > 8) {
111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci       // This one triggers too many things. If all the parents are AND
121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci       // nodes and have other things guarding them, then get rid of
131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci       // this trigger. TODO(vsri): Adjust the threshold appropriately,
141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci       // make it a function of total number of nodes?
151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci       bool have_other_guard = true;
161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci-      for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+      for (StdIntMap::iterator it = parents->begin();
181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+           it != parents->end(); ++it) {
191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         have_other_guard = have_other_guard &&
201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci-            (entries_[it->index()].propagate_up_at_count > 1);
211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+            (entries_[it->first].propagate_up_at_count > 1);
221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+      }
231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 
241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci       if (have_other_guard) {
251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci-        for (IntMap::iterator it = parents->begin();
261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+        for (StdIntMap::iterator it = parents->begin();
271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci              it != parents->end(); ++it)
281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci-          entries_[it->index()].propagate_up_at_count -= 1;
291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+          entries_[it->first].propagate_up_at_count -= 1;
301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 
311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci         parents->clear();  // Forget the parents
321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci       }
331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci@@ -213,7 +215,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   }
351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   entries_.resize(node_map_.size());
361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 
371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci-  // Create parent IntMap for the entries.
381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+  // Create parent StdIntMap for the entries.
391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci   for (int i = v.size()  - 1; i >= 0; i--) {
401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci     Prefilter* prefilter = v[i];
411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci     if (prefilter == NULL)
42@@ -223,7 +225,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
43       continue;
44 
45     Entry* entry = &entries_[prefilter->unique_id()];
46-    entry->parents = new IntMap(node_map_.size());
47+    entry->parents = new StdIntMap();
48   }
49 
50   // Fill the entries.
51@@ -249,7 +251,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
52 
53       case Prefilter::OR:
54       case Prefilter::AND: {
55-        IntMap uniq_child(node_map_.size());
56+        std::set<int> uniq_child;
57         for (int j = 0; j < prefilter->subs()->size() ; j++) {
58           Prefilter* child = (*prefilter->subs())[j];
59           Prefilter* canonical = CanonicalNode(child);
60@@ -258,12 +260,12 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) {
61             return;
62           }
63           int child_id = canonical->unique_id();
64-          if (!uniq_child.has_index(child_id))
65-            uniq_child.set_new(child_id, 1);
66+          uniq_child.insert(child_id);
67           // To the child, we want to add to parent indices.
68           Entry* child_entry = &entries_[child_id];
69-          if (!child_entry->parents->has_index(prefilter->unique_id()))
70-            child_entry->parents->set_new(prefilter->unique_id(), 1);
71+          if (child_entry->parents->find(prefilter->unique_id()) ==
72+              child_entry->parents->end())
73+            (*child_entry->parents)[prefilter->unique_id()] = 1;
74         }
75         entry->propagate_up_at_count =
76             prefilter->op() == Prefilter::AND ? uniq_child.size() : 1;
77@@ -329,10 +331,10 @@ void PrefilterTree::PropagateMatch(const vector<int>& atom_ids,
78     }
79     int c;
80     // Pass trigger up to parents.
81-    for (IntMap::iterator it = entry.parents->begin();
82+    for (StdIntMap::iterator it = entry.parents->begin();
83          it != entry.parents->end();
84          ++it) {
85-      int j = it->index();
86+      int j = it->first;
87       const Entry& parent = entries_[j];
88       VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count;
89       // Delay until all the children have succeeded.
90@@ -364,12 +366,12 @@ void PrefilterTree::PrintDebugInfo() {
91   VLOG(10) << "#Unique Nodes: " << entries_.size();
92 
93   for (int i = 0; i < entries_.size(); ++i) {
94-    IntMap* parents = entries_[i].parents;
95+    StdIntMap* parents = entries_[i].parents;
96     const vector<int>& regexps = entries_[i].regexps;
97     VLOG(10) << "EntryId: " << i
98             << " N: " << parents->size() << " R: " << regexps.size();
99-    for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it)
100-      VLOG(10) << it->index();
101+    for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it)
102+      VLOG(10) << it->first;
103   }
104   VLOG(10) << "Map:";
105   for (map<string, Prefilter*>::const_iterator iter = node_map_.begin();
106diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h
107--- a/re2/prefilter_tree.h
108+++ b/re2/prefilter_tree.h
109@@ -16,12 +16,15 @@
110 #ifndef RE2_PREFILTER_TREE_H_
111 #define RE2_PREFILTER_TREE_H_
112 
113+#include <map>
114+
115 #include "util/util.h"
116 #include "util/sparse_array.h"
117 
118 namespace re2 {
119 
120 typedef SparseArray<int> IntMap;
121+typedef std::map<int, int> StdIntMap;
122 
123 class Prefilter;
124 
125@@ -71,7 +74,7 @@ class PrefilterTree {
126     // are two different nodes, but they share the atom 'def'. So when
127     // 'def' matches, it triggers two parents, corresponding to the two
128     // different OR nodes.
129-    IntMap* parents;
130+    StdIntMap* parents;
131 
132     // When this node is ready to trigger the parent, what are the
133     // regexps that are triggered.
134