12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2006 The RE2 Authors. All Rights Reserved. 22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style 32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file. 42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// DESCRIPTION 62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// SparseSet<T>(m) is a set of integers in [0, m). 82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// It requires sizeof(int)*m memory, but it provides 92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// fast iteration through the elements in the set and fast clearing 102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// of the set. 112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Insertion and deletion are constant time operations. 132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Allocating the set is a constant time operation 152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// when memory allocation is a constant time operation. 162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Clearing the set is a constant time operation (unusual!). 182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Iterating through the set is an O(n) operation, where n 202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// is the number of items in the set (not O(m)). 212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The set iterator visits entries in the order they were first 232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// inserted into the array. It is safe to add items to the set while 242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// using an iterator: the iterator will visit indices added to the set 252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// during the iteration, but will not re-visit indices whose values 262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// change after visiting. Thus SparseSet can be a convenient 272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// implementation of a work queue. 282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The SparseSet implementation is NOT thread-safe. It is up to the 302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// caller to make sure only one thread is accessing the set. (Typically 312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// these sets are temporary values and used in situations where speed is 322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// important.) 332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The SparseSet interface does not present all the usual STL bells and 352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// whistles. 362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Implemented with reference to Briggs & Torczon, An Efficient 382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Representation for Sparse Sets, ACM Letters on Programming Languages 392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69. 402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// For a generalization to sparse array, see sparse_array.h. 422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// IMPLEMENTATION 442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// 452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// See sparse_array.h for implementation details 462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#ifndef RE2_UTIL_SPARSE_SET_H__ 482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#define RE2_UTIL_SPARSE_SET_H__ 492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/util.h" 512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 { 532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonclass SparseSet { 552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson public: 562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson SparseSet() 570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL), valgrind_(RunningOnValgrind()) {} 582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson SparseSet(int max_size) { 602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson max_size_ = max_size; 612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson sparse_to_dense_ = new int[max_size]; 622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson dense_ = new int[max_size]; 630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin valgrind_ = RunningOnValgrind(); 642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Don't need to zero the memory, but do so anyway 652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // to appease Valgrind. 660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (valgrind_) { 672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = 0; i < max_size; i++) { 682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson dense_[i] = 0xababababU; 692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson sparse_to_dense_[i] = 0xababababU; 702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson size_ = 0; 732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson ~SparseSet() { 762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] sparse_to_dense_; 772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] dense_; 782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson typedef int* iterator; 812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson typedef const int* const_iterator; 822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int size() const { return size_; } 842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson iterator begin() { return dense_; } 852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson iterator end() { return dense_ + size_; } 862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const_iterator begin() const { return dense_; } 872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson const_iterator end() const { return dense_ + size_; } 882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Change the maximum size of the array. 902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Invalidates all iterators. 912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void resize(int new_max_size) { 922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (size_ > new_max_size) 932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson size_ = new_max_size; 942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (new_max_size > max_size_) { 952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int* a = new int[new_max_size]; 962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (sparse_to_dense_) { 972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson memmove(a, sparse_to_dense_, max_size_*sizeof a[0]); 980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (valgrind_) { 992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = max_size_; i < new_max_size; i++) 1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson a[i] = 0xababababU; 1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] sparse_to_dense_; 1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson sparse_to_dense_ = a; 1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson a = new int[new_max_size]; 1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (dense_) { 1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson memmove(a, dense_, size_*sizeof a[0]); 1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin if (valgrind_) { 1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson for (int i = size_; i < new_max_size; i++) 1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson a[i] = 0xababababU; 1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson delete[] dense_; 1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson dense_ = a; 1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson max_size_ = new_max_size; 1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Return the maximum size of the array. 1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Indices can be in the range [0, max_size). 1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int max_size() const { return max_size_; } 1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Clear the array. 1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void clear() { size_ = 0; } 1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Check whether i is in the array. 1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson bool contains(int i) const { 1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson DCHECK_GE(i, 0); 1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson DCHECK_LT(i, max_size_); 1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (static_cast<uint>(i) >= max_size_) { 1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return false; 1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Unsigned comparison avoids checking sparse_to_dense_[i] < 0. 1352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return (uint)sparse_to_dense_[i] < (uint)size_ && 1362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson dense_[sparse_to_dense_[i]] == i; 1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Adds i to the set. 1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void insert(int i) { 1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (!contains(i)) 1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson insert_new(i); 1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Set the value at the new index i to v. 1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Fast but unsafe: only use if contains(i) is false. 1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson void insert_new(int i) { 1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson if (static_cast<uint>(i) >= max_size_) { 1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Semantically, end() would be better here, but we already know 1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // the user did something stupid, so begin() insulates them from 1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // dereferencing an invalid pointer. 1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson return; 1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson DCHECK(!contains(i)); 1552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson DCHECK_LT(size_, max_size_); 1562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson sparse_to_dense_[i] = size_; 1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson dense_[size_] = i; 1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson size_++; 1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson } 1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Comparison function for sorting. 1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // Can sort the sparse array so that future iterations 1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // will visit indices in increasing order using 1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson // sort(arr.begin(), arr.end(), arr.less); 1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson static bool less(int a, int b) { return a < b; } 1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson private: 1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int size_; 1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int max_size_; 1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int* sparse_to_dense_; 1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson int* dense_; 1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin bool valgrind_; 1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson DISALLOW_EVIL_CONSTRUCTORS(SparseSet); 1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}; 1762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson} // namespace re2 1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson 1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#endif // RE2_UTIL_SPARSE_SET_H__ 180