12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2006 The RE2 Authors.  All Rights Reserved.
22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style
32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file.
42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// DESCRIPTION
62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// SparseSet<T>(m) is a set of integers in [0, m).
82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// It requires sizeof(int)*m memory, but it provides
92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// fast iteration through the elements in the set and fast clearing
102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// of the set.
112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Insertion and deletion are constant time operations.
132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Allocating the set is a constant time operation
152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// when memory allocation is a constant time operation.
162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Clearing the set is a constant time operation (unusual!).
182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Iterating through the set is an O(n) operation, where n
202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// is the number of items in the set (not O(m)).
212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The set iterator visits entries in the order they were first
232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// inserted into the array.  It is safe to add items to the set while
242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// using an iterator: the iterator will visit indices added to the set
252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// during the iteration, but will not re-visit indices whose values
262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// change after visiting.  Thus SparseSet can be a convenient
272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// implementation of a work queue.
282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The SparseSet implementation is NOT thread-safe.  It is up to the
302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// caller to make sure only one thread is accessing the set.  (Typically
312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// these sets are temporary values and used in situations where speed is
322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// important.)
332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// The SparseSet interface does not present all the usual STL bells and
352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// whistles.
362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Implemented with reference to Briggs & Torczon, An Efficient
382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Representation for Sparse Sets, ACM Letters on Programming Languages
392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// and Systems, Volume 2, Issue 1-4 (March-Dec.  1993), pp.  59-69.
402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// For a generalization to sparse array, see sparse_array.h.
422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// IMPLEMENTATION
442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// See sparse_array.h for implementation details
462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#ifndef RE2_UTIL_SPARSE_SET_H__
482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#define RE2_UTIL_SPARSE_SET_H__
492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include "util/util.h"
512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 {
532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonclass SparseSet {
552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson public:
562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  SparseSet()
570d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    : size_(0), max_size_(0), sparse_to_dense_(NULL), dense_(NULL), valgrind_(RunningOnValgrind()) {}
582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  SparseSet(int max_size) {
602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    max_size_ = max_size;
612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    sparse_to_dense_ = new int[max_size];
622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    dense_ = new int[max_size];
630d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    valgrind_ = RunningOnValgrind();
642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // Don't need to zero the memory, but do so anyway
652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // to appease Valgrind.
660d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin    if (valgrind_) {
672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      for (int i = 0; i < max_size; i++) {
682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        dense_[i] = 0xababababU;
692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        sparse_to_dense_[i] = 0xababababU;
702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    size_ = 0;
732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  ~SparseSet() {
762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    delete[] sparse_to_dense_;
772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    delete[] dense_;
782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef int* iterator;
812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef const int* const_iterator;
822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int size() const { return size_; }
842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  iterator begin() { return dense_; }
852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  iterator end() { return dense_ + size_; }
862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  const_iterator begin() const { return dense_; }
872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  const_iterator end() const { return dense_ + size_; }
882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Change the maximum size of the array.
902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Invalidates all iterators.
912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void resize(int new_max_size) {
922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (size_ > new_max_size)
932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      size_ = new_max_size;
942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (new_max_size > max_size_) {
952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      int* a = new int[new_max_size];
962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (sparse_to_dense_) {
972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        memmove(a, sparse_to_dense_, max_size_*sizeof a[0]);
980d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        if (valgrind_) {
992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          for (int i = max_size_; i < new_max_size; i++)
1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson            a[i] = 0xababababU;
1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        }
1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        delete[] sparse_to_dense_;
1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      sparse_to_dense_ = a;
1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      a = new int[new_max_size];
1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (dense_) {
1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        memmove(a, dense_, size_*sizeof a[0]);
1090d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin        if (valgrind_) {
1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson          for (int i = size_; i < new_max_size; i++)
1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson            a[i] = 0xababababU;
1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        }
1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson        delete[] dense_;
1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      }
1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      dense_ = a;
1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    max_size_ = new_max_size;
1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Return the maximum size of the array.
1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Indices can be in the range [0, max_size).
1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int max_size() const { return max_size_; }
1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Clear the array.
1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void clear() { size_ = 0; }
1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Check whether i is in the array.
1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  bool contains(int i) const {
1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    DCHECK_GE(i, 0);
1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    DCHECK_LT(i, max_size_);
1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (static_cast<uint>(i) >= max_size_) {
1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return false;
1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    // Unsigned comparison avoids checking sparse_to_dense_[i] < 0.
1352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return (uint)sparse_to_dense_[i] < (uint)size_ &&
1362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      dense_[sparse_to_dense_[i]] == i;
1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Adds i to the set.
1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void insert(int i) {
1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (!contains(i))
1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      insert_new(i);
1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Set the value at the new index i to v.
1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Fast but unsafe: only use if contains(i) is false.
1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void insert_new(int i) {
1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (static_cast<uint>(i) >= max_size_) {
1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // Semantically, end() would be better here, but we already know
1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // the user did something stupid, so begin() insulates them from
1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      // dereferencing an invalid pointer.
1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      return;
1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
1542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    DCHECK(!contains(i));
1552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    DCHECK_LT(size_, max_size_);
1562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    sparse_to_dense_[i] = size_;
1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    dense_[size_] = i;
1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    size_++;
1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Comparison function for sorting.
1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Can sort the sparse array so that future iterations
1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // will visit indices in increasing order using
1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // sort(arr.begin(), arr.end(), arr.less);
1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static bool less(int a, int b) { return a < b; }
1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson private:
1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int size_;
1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int max_size_;
1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int* sparse_to_dense_;
1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int* dense_;
1720d4c52358a1af421705c54bd8a9fdd8a30558a2eAlexander Gutkin  bool valgrind_;
1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  DISALLOW_EVIL_CONSTRUCTORS(SparseSet);
1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson};
1762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}  // namespace re2
1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#endif  // RE2_UTIL_SPARSE_SET_H__
180