re2-memory-optimization.patch revision c2e0dbddbe15c98d52c4786dac06cb8952a8ae6d
11320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccidiff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc 21320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci--- a/re2/prefilter_tree.cc 31320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+++ b/re2/prefilter_tree.cc 41320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci@@ -107,21 +107,23 @@ void PrefilterTree::Compile(vector<string>* atom_vec) { 51320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // not miss out on any regexps triggering by getting rid of a 61320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // prefilter node. 71320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (int i = 0; i < entries_.size(); i++) { 81320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci- IntMap* parents = entries_[i].parents; 91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+ StdIntMap* parents = entries_[i].parents; 101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (parents->size() > 8) { 111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // This one triggers too many things. If all the parents are AND 121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // nodes and have other things guarding them, then get rid of 131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // this trigger. TODO(vsri): Adjust the threshold appropriately, 141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // make it a function of total number of nodes? 151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci bool have_other_guard = true; 161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it) 171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+ for (StdIntMap::iterator it = parents->begin(); 181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+ it != parents->end(); ++it) { 191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci have_other_guard = have_other_guard && 201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci- (entries_[it->index()].propagate_up_at_count > 1); 211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+ (entries_[it->first].propagate_up_at_count > 1); 221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+ } 231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (have_other_guard) { 251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci- for (IntMap::iterator it = parents->begin(); 261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+ for (StdIntMap::iterator it = parents->begin(); 271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci it != parents->end(); ++it) 281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci- entries_[it->index()].propagate_up_at_count -= 1; 291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+ entries_[it->first].propagate_up_at_count -= 1; 301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci parents->clear(); // Forget the parents 321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci@@ -213,7 +215,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { 341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci entries_.resize(node_map_.size()); 361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci- // Create parent IntMap for the entries. 381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci+ // Create parent StdIntMap for the entries. 391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (int i = v.size() - 1; i >= 0; i--) { 401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci Prefilter* prefilter = v[i]; 411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (prefilter == NULL) 42@@ -223,7 +225,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { 43 continue; 44 45 Entry* entry = &entries_[prefilter->unique_id()]; 46- entry->parents = new IntMap(node_map_.size()); 47+ entry->parents = new StdIntMap(); 48 } 49 50 // Fill the entries. 51@@ -249,7 +251,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { 52 53 case Prefilter::OR: 54 case Prefilter::AND: { 55- IntMap uniq_child(node_map_.size()); 56+ std::set<int> uniq_child; 57 for (int j = 0; j < prefilter->subs()->size() ; j++) { 58 Prefilter* child = (*prefilter->subs())[j]; 59 Prefilter* canonical = CanonicalNode(child); 60@@ -258,12 +260,12 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { 61 return; 62 } 63 int child_id = canonical->unique_id(); 64- if (!uniq_child.has_index(child_id)) 65- uniq_child.set_new(child_id, 1); 66+ uniq_child.insert(child_id); 67 // To the child, we want to add to parent indices. 68 Entry* child_entry = &entries_[child_id]; 69- if (!child_entry->parents->has_index(prefilter->unique_id())) 70- child_entry->parents->set_new(prefilter->unique_id(), 1); 71+ if (child_entry->parents->find(prefilter->unique_id()) == 72+ child_entry->parents->end()) 73+ (*child_entry->parents)[prefilter->unique_id()] = 1; 74 } 75 entry->propagate_up_at_count = 76 prefilter->op() == Prefilter::AND ? uniq_child.size() : 1; 77@@ -329,10 +331,10 @@ void PrefilterTree::PropagateMatch(const vector<int>& atom_ids, 78 } 79 int c; 80 // Pass trigger up to parents. 81- for (IntMap::iterator it = entry.parents->begin(); 82+ for (StdIntMap::iterator it = entry.parents->begin(); 83 it != entry.parents->end(); 84 ++it) { 85- int j = it->index(); 86+ int j = it->first; 87 const Entry& parent = entries_[j]; 88 VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count; 89 // Delay until all the children have succeeded. 90@@ -364,12 +366,12 @@ void PrefilterTree::PrintDebugInfo() { 91 VLOG(10) << "#Unique Nodes: " << entries_.size(); 92 93 for (int i = 0; i < entries_.size(); ++i) { 94- IntMap* parents = entries_[i].parents; 95+ StdIntMap* parents = entries_[i].parents; 96 const vector<int>& regexps = entries_[i].regexps; 97 VLOG(10) << "EntryId: " << i 98 << " N: " << parents->size() << " R: " << regexps.size(); 99- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it) 100- VLOG(10) << it->index(); 101+ for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) 102+ VLOG(10) << it->first; 103 } 104 VLOG(10) << "Map:"; 105 for (map<string, Prefilter*>::const_iterator iter = node_map_.begin(); 106diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h 107--- a/re2/prefilter_tree.h 108+++ b/re2/prefilter_tree.h 109@@ -16,12 +16,15 @@ 110 #ifndef RE2_PREFILTER_TREE_H_ 111 #define RE2_PREFILTER_TREE_H_ 112 113+#include <map> 114+ 115 #include "util/util.h" 116 #include "util/sparse_array.h" 117 118 namespace re2 { 119 120 typedef SparseArray<int> IntMap; 121+typedef std::map<int, int> StdIntMap; 122 123 class Prefilter; 124 125@@ -71,7 +74,7 @@ class PrefilterTree { 126 // are two different nodes, but they share the atom 'def'. So when 127 // 'def' matches, it triggers two parents, corresponding to the two 128 // different OR nodes. 129- IntMap* parents; 130+ StdIntMap* parents; 131 132 // When this node is ready to trigger the parent, what are the 133 // regexps that are triggered. 134