re2-memory-optimization.patch revision c2e0dbddbe15c98d52c4786dac06cb8952a8ae6d
1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)diff --git a/re2/prefilter_tree.cc b/re2/prefilter_tree.cc 2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)--- a/re2/prefilter_tree.cc 3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+++ b/re2/prefilter_tree.cc 4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -107,21 +107,23 @@ void PrefilterTree::Compile(vector<string>* atom_vec) { 5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // not miss out on any regexps triggering by getting rid of a 6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // prefilter node. 7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = 0; i < entries_.size(); i++) { 8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- IntMap* parents = entries_[i].parents; 9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ StdIntMap* parents = entries_[i].parents; 10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (parents->size() > 8) { 11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // This one triggers too many things. If all the parents are AND 12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // nodes and have other things guarding them, then get rid of 13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // this trigger. TODO(vsri): Adjust the threshold appropriately, 14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // make it a function of total number of nodes? 15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) bool have_other_guard = true; 16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it) 17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ for (StdIntMap::iterator it = parents->begin(); 18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ it != parents->end(); ++it) { 19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) have_other_guard = have_other_guard && 20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- (entries_[it->index()].propagate_up_at_count > 1); 21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ (entries_[it->first].propagate_up_at_count > 1); 22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ } 23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (have_other_guard) { 25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- for (IntMap::iterator it = parents->begin(); 26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ for (StdIntMap::iterator it = parents->begin(); 27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) it != parents->end(); ++it) 28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- entries_[it->index()].propagate_up_at_count -= 1; 29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ entries_[it->first].propagate_up_at_count -= 1; 30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) parents->clear(); // Forget the parents 32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -213,7 +215,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { 34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) entries_.resize(node_map_.size()); 36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- // Create parent IntMap for the entries. 38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ // Create parent StdIntMap for the entries. 39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = v.size() - 1; i >= 0; i--) { 40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Prefilter* prefilter = v[i]; 41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) if (prefilter == NULL) 42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -223,7 +225,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { 43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) continue; 44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Entry* entry = &entries_[prefilter->unique_id()]; 46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- entry->parents = new IntMap(node_map_.size()); 47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ entry->parents = new StdIntMap(); 48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Fill the entries. 51c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -249,7 +251,7 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { 52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) case Prefilter::OR: 54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) case Prefilter::AND: { 55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- IntMap uniq_child(node_map_.size()); 56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ std::set<int> uniq_child; 57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int j = 0; j < prefilter->subs()->size() ; j++) { 58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Prefilter* child = (*prefilter->subs())[j]; 59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Prefilter* canonical = CanonicalNode(child); 60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -258,12 +260,12 @@ void PrefilterTree::AssignUniqueIds(vector<string>* atom_vec) { 61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) return; 62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int child_id = canonical->unique_id(); 64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- if (!uniq_child.has_index(child_id)) 65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- uniq_child.set_new(child_id, 1); 66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ uniq_child.insert(child_id); 67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // To the child, we want to add to parent indices. 68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) Entry* child_entry = &entries_[child_id]; 69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- if (!child_entry->parents->has_index(prefilter->unique_id())) 70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- child_entry->parents->set_new(prefilter->unique_id(), 1); 71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ if (child_entry->parents->find(prefilter->unique_id()) == 72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ child_entry->parents->end()) 73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ (*child_entry->parents)[prefilter->unique_id()] = 1; 74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) entry->propagate_up_at_count = 76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) prefilter->op() == Prefilter::AND ? uniq_child.size() : 1; 77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -329,10 +331,10 @@ void PrefilterTree::PropagateMatch(const vector<int>& atom_ids, 78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) int c; 80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Pass trigger up to parents. 81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- for (IntMap::iterator it = entry.parents->begin(); 82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ for (StdIntMap::iterator it = entry.parents->begin(); 83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) it != entry.parents->end(); 84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) ++it) { 85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- int j = it->index(); 86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ int j = it->first; 87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const Entry& parent = entries_[j]; 88c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) VLOG(10) << " parent= " << j << " trig= " << parent.propagate_up_at_count; 89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Delay until all the children have succeeded. 90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -364,12 +366,12 @@ void PrefilterTree::PrintDebugInfo() { 91c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) VLOG(10) << "#Unique Nodes: " << entries_.size(); 92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (int i = 0; i < entries_.size(); ++i) { 94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- IntMap* parents = entries_[i].parents; 95c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ StdIntMap* parents = entries_[i].parents; 96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) const vector<int>& regexps = entries_[i].regexps; 97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) VLOG(10) << "EntryId: " << i 98c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) << " N: " << parents->size() << " R: " << regexps.size(); 99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- for (IntMap::iterator it = parents->begin(); it != parents->end(); ++it) 100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- VLOG(10) << it->index(); 101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ for (StdIntMap::iterator it = parents->begin(); it != parents->end(); ++it) 102c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ VLOG(10) << it->first; 103c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) } 104c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) VLOG(10) << "Map:"; 105c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) for (map<string, Prefilter*>::const_iterator iter = node_map_.begin(); 106c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)diff --git a/re2/prefilter_tree.h b/re2/prefilter_tree.h 107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)--- a/re2/prefilter_tree.h 108c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+++ b/re2/prefilter_tree.h 109c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -16,12 +16,15 @@ 110c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) #ifndef RE2_PREFILTER_TREE_H_ 111c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) #define RE2_PREFILTER_TREE_H_ 112c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 113c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+#include <map> 114c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ 115c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) #include "util/util.h" 116c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) #include "util/sparse_array.h" 117c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 118c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) namespace re2 { 119c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 120c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) typedef SparseArray<int> IntMap; 121c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+typedef std::map<int, int> StdIntMap; 122c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 123c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) class Prefilter; 124c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 125c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)@@ -71,7 +74,7 @@ class PrefilterTree { 126c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // are two different nodes, but they share the atom 'def'. So when 127c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // 'def' matches, it triggers two parents, corresponding to the two 128c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // different OR nodes. 129c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)- IntMap* parents; 130c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)+ StdIntMap* parents; 131c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 132c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // When this node is ready to trigger the parent, what are the 133c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // regexps that are triggered. 134