1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// bi-table.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Classes for representing a bijective mapping between an arbitrary entry
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// of type T and a signed integral ID.
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_BI_TABLE_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_BI_TABLE_H__
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <deque>
265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinusing std::deque;
275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <functional>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <tr1/unordered_set>
325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinusing std::tr1::unordered_set;
335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinusing std::tr1::unordered_multiset;
345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// BI TABLES - these determine a bijective mapping between an
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// arbitrary entry of type T and an signed integral ID of type I. The IDs are
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// allocated starting from 0 in order.
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// template <class I, class T>
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// class BiTable {
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//  public:
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Required constructors.
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   BiTable();
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
48dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//   // Lookup integer ID from entry. If it doesn't exist and 'insert'
49dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//   / is true, then add it. Otherwise return -1.
50dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//   I FindId(const T &entry, bool insert = true);
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // Lookup entry from integer ID.
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   const T &FindEntry(I) const;
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   // # of stored entries.
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   I Size() const;
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// };
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// An implementation using a hash map for the entry to ID mapping.
585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// H is the hash function and E is the equality function.
595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// If passed to the constructor, ownership is given to this class.
605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class H, class E = std::equal_to<T> >
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass HashBiTable {
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Reserves space for 'table_size' elements.
655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : hash_func_(h),
675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        hash_equal_(e),
685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) {
695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (table_size)
705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      id2entry_.reserve(table_size);
715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  HashBiTable(const HashBiTable<I, T, H, E> &table)
745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        entry2id_(table.entry2id_.begin(), table.entry2id_.end(),
775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                  table.entry2id_.size(),
785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                  (hash_func_ ? *hash_func_ : H()),
795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                  (hash_equal_ ? *hash_equal_ : E())),
805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        id2entry_(table.id2entry_) { }
815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  ~HashBiTable() {
835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    delete hash_func_;
845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    delete hash_equal_;
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
87dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  I FindId(const T &entry, bool insert = true) {
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    I &id_ref = entry2id_[entry];
89dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (id_ref == 0) {  // T not found
90dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (insert) {     // store and assign it a new ID
91dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        id2entry_.push_back(entry);
92dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        id_ref = id2entry_.size();
93dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      } else {
94dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        return -1;
95dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return id_ref - 1;  // NB: id_ref = ID + 1
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T &FindEntry(I s) const {
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return id2entry_[s];
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  I Size() const { return id2entry_.size(); }
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
1075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  H *hash_func_;
1085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  E *hash_equal_;
1095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  unordered_map<T, I, H, E> entry2id_;
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<T> id2entry_;
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void operator=(const HashBiTable<I, T, H, E> &table);  // disallow
1135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin};
1145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Enables alternative hash set representations below.
1175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
1185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintypedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
1195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Default hash set is STL hash_set
1215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate<class K, class H, class E, HSType>
1225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinstruct  HashSet : public unordered_set<K, H, E> {
1235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  HashSet(size_t n = 0, const H &h = H(), const E &e = E())
1245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : unordered_set<K, H, E>(n, h, e)  { }
1255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void rehash(size_t n) { }
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// An implementation using a hash set for the entry to ID mapping.
1315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// The hash set holds 'keys' which are either the ID or kCurrentKey.
1325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// These keys can be mapped to entrys either by looking up in the
1335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// entry vector or, if kCurrentKey, in current_entry_ member. The hash
1345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// and key equality functions map to entries first. H
1355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// is the hash function and E is the equality function. If passed to
1365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// the constructor, ownership is given to this class.
1375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class H,
1385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin          class E = std::equal_to<T>, HSType HS = HS_DENSE>
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass CompactHashBiTable {
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class HashFunc;
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class HashEqual;
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Reserves space for 'table_size' elements.
1455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
1465b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : hash_func_(h),
1475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        hash_equal_(e),
1485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        compact_hash_func_(*this),
1495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        compact_hash_equal_(*this),
1505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        keys_(table_size, compact_hash_func_, compact_hash_equal_) {
1515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (table_size)
1525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      id2entry_.reserve(table_size);
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table)
1565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
1575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
1585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        compact_hash_func_(*this),
1595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        compact_hash_equal_(*this),
1605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_),
1615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        id2entry_(table.id2entry_) {
1625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    keys_.insert(table.keys_.begin(), table.keys_.end());
1635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin }
1645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
1655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  ~CompactHashBiTable() {
1665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    delete hash_func_;
1675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    delete hash_equal_;
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
170dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  I FindId(const T &entry, bool insert = true) {
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    current_entry_ = &entry;
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
173dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (it == keys_.end()) {  // T not found
174dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (insert) {           // store and assign it a new ID
175dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        I key = id2entry_.size();
176dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        id2entry_.push_back(entry);
177dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        keys_.insert(key);
178dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        return key;
179dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      } else {
180dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        return -1;
181dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return *it;
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T &FindEntry(I s) const { return id2entry_[s]; }
1885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  I Size() const { return id2entry_.size(); }
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
1915b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Clear content. With argument, erases last n IDs.
1925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void Clear(ssize_t n = -1) {
1935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (n < 0 || n > id2entry_.size())
1945b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      n = id2entry_.size();
1955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    while (n-- > 0) {
1965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      I key = id2entry_.size() - 1;
1975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      keys_.erase(key);
1985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      id2entry_.pop_back();
1995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
2005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    keys_.rehash(0);
2015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
2025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
2045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  static const I kCurrentKey;    // -1
2055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  static const I kEmptyKey;      // -2
2065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  static const I kDeletedKey;    // -3
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class HashFunc {
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
2125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    size_t operator()(I k) const {
2135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (k >= kCurrentKey) {
2145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        return (*ht_->hash_func_)(ht_->Key2Entry(k));
2155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      } else {
2165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        return 0;
2175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      }
2185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
2195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   private:
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const CompactHashBiTable *ht_;
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class HashEqual {
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool operator()(I k1, I k2) const {
2295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
2305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2));
2315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      } else {
2325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        return k1 == k2;
2335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      }
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   private:
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const CompactHashBiTable *ht_;
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
2395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
2415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  const T &Key2Entry(I k) const {
2425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (k == kCurrentKey)
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return *current_entry_;
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return id2entry_[k];
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
2485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  H *hash_func_;
2495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  E *hash_equal_;
2505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  HashFunc compact_hash_func_;
2515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  HashEqual compact_hash_equal_;
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  KeyHashSet keys_;
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<T> id2entry_;
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T *current_entry_;
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
2565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void operator=(const CompactHashBiTable<I, T, H, E, HS> &table);  // disallow
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
2605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class H, class E, HSType HS>
2615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1;
2625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class H, class E, HSType HS>
2645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2;
2655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class H, class E, HSType HS>
2675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3;
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// An implementation using a vector for the entry to ID mapping.
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// It is passed a function object FP that should fingerprint entries
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// uniquely to an integer that can used as a vector index. Normally,
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// VectorBiTable constructs the FP object.  The user can instead
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// pass in this object; in that case, VectorBiTable takes its
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ownership.
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T, class FP>
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass VectorBiTable {
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
2795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // Reserves space for 'table_size' elements.
2805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  explicit VectorBiTable(FP *fp = 0, size_t table_size = 0)
2815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : fp_(fp ? fp : new FP()) {
2825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (table_size)
2835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      id2entry_.reserve(table_size);
2845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  }
2855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
2865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  VectorBiTable(const VectorBiTable<I, T, FP> &table)
2875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : fp_(table.fp_ ? new FP(*table.fp_) : 0),
2885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        fp2id_(table.fp2id_),
2895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        id2entry_(table.id2entry_) { }
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~VectorBiTable() { delete fp_; }
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
293dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  I FindId(const T &entry, bool insert = true) {
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ssize_t fp = (*fp_)(entry);
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (fp >= fp2id_.size())
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fp2id_.resize(fp + 1);
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    I &id_ref = fp2id_[fp];
298dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (id_ref == 0) {  // T not found
299dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (insert) {     // store and assign it a new ID
300dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        id2entry_.push_back(entry);
301dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        id_ref = id2entry_.size();
302dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      } else {
303dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        return -1;
304dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return id_ref - 1;  // NB: id_ref = ID + 1
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T &FindEntry(I s) const { return id2entry_[s]; }
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  I Size() const { return id2entry_.size(); }
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const FP &Fingerprint() const { return *fp_; }
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FP *fp_;
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<I> fp2id_;
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<T> id2entry_;
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void operator=(const VectorBiTable<I, T, FP> &table);  // disallow
321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// An implementation using a vector and a compact hash table. The
325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// selecting functor S returns true for entries to be hashed in the
326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// vector.  The fingerprinting functor FP returns a unique fingerprint
327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// for each entry to be hashed in the vector (these need to be
3285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// suitable for indexing in a vector).  The hash functor H is used
3295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// when hashing entry into the compact hash table.  If passed to the
3305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// constructor, ownership is given to this class.
3315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE>
332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass VectorHashBiTable {
333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class HashFunc;
335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  friend class HashEqual;
336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0,
3385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                             size_t vector_size = 0,
3395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                             size_t entry_size = 0)
340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : selector_(s),
3415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        fp_(fp ? fp : new FP()),
3425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        h_(h ? h : new H()),
343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        hash_func_(*this),
344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        hash_equal_(*this),
345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        keys_(0, hash_func_, hash_equal_) {
346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (vector_size)
347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fp2id_.reserve(vector_size);
348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (entry_size)
349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      id2entry_.reserve(entry_size);
3505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin }
3515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin
3525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table)
3535b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      : selector_(new S(table.s_)),
3545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        fp_(table.fp_ ? new FP(*table.fp_) : 0),
3555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        h_(table.h_ ? new H(*table.h_) : 0),
3565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        id2entry_(table.id2entry_),
3575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        fp2id_(table.fp2id_),
3585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        hash_func_(*this),
3595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        hash_equal_(*this),
3605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        keys_(table.keys_.size(), hash_func_, hash_equal_) {
3615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin     keys_.insert(table.keys_.begin(), table.keys_.end());
362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ~VectorHashBiTable() {
365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete selector_;
366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete fp_;
367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    delete h_;
368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
370dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  I FindId(const T &entry, bool insert = true) {
371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((*selector_)(entry)) {  // Use the vector if 'selector_(entry) == true'
372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      uint64 fp = (*fp_)(entry);
373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (fp2id_.size() <= fp)
374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        fp2id_.resize(fp + 1, 0);
375dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (fp2id_[fp] == 0) {         // T not found
376dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        if (insert) {                // store and assign it a new ID
377dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          id2entry_.push_back(entry);
378dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          fp2id_[fp] = id2entry_.size();
379dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        } else {
380dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          return -1;
381dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        }
382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return fp2id_[fp] - 1;  // NB: assoc_value = ID + 1
384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {  // Use the hash table otherwise.
385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      current_entry_ = &entry;
386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (it == keys_.end()) {
388dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        if (insert) {
389dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          I key = id2entry_.size();
390dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          id2entry_.push_back(entry);
391dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          keys_.insert(key);
392dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          return key;
393dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        } else {
394dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          return -1;
395dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        }
396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {
397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return *it;
398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T &FindEntry(I s) const {
403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return id2entry_[s];
404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  I Size() const { return id2entry_.size(); }
407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const S &Selector() const { return *selector_; }
409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const FP &Fingerprint() const { return *fp_; }
411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const H &Hash() const { return *h_; }
413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
4155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  static const I kCurrentKey;  // -1
4165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  static const I kEmptyKey;    // -2
417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class HashFunc {
419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    size_t operator()(I k) const {
4235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (k >= kCurrentKey) {
4245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        return (*(ht_->h_))(ht_->Key2Entry(k));
4255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      } else {
4265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        return 0;
4275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      }
4285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    }
429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   private:
430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const VectorHashBiTable *ht_;
431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  class HashEqual {
434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   public:
435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool operator()(I k1, I k2) const {
4385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
4395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
4405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      } else {
4415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin        return k1 == k2;
4425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin      }
443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson   private:
445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const VectorHashBiTable *ht_;
446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T &Key2Entry(I k) const {
4515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (k == kCurrentKey)
452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return *current_entry_;
453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    else
454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return id2entry_[k];
455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  S *selector_;  // Returns true if entry hashed into vector
458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FP *fp_;       // Fingerprint used when hashing entry into vector
459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  H *h_;         // Hash function used when hashing entry into hash_set
460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<T> id2entry_;  // Maps state IDs to entry
4625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  vector<I> fp2id_;     // Maps entry fingerprints to IDs
463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Compact implementation of the hash table mapping entrys to
465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // state IDs using the hash function 'h_'
466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  HashFunc hash_func_;
467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  HashEqual hash_equal_;
468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  KeyHashSet keys_;
469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T *current_entry_;
470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // disallow
4725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table);
473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class S, class FP, class H, HSType HS>
4765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1;
477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class S, class FP, class H, HSType HS>
4795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3;
480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// An implementation using a hash map for the entry to ID
4835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// mapping. This version permits erasing of arbitrary states.  The
4845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// entry T must have == defined and its default constructor must
4855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// produce a entry that will never be seen. F is the hash function.
486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T, class F>
487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ErasableBiTable {
488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ErasableBiTable() : first_(0) {}
490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
491dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  I FindId(const T &entry, bool insert = true) {
492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    I &id_ref = entry2id_[entry];
493dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin    if (id_ref == 0) {  // T not found
494dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (insert) {     // store and assign it a new ID
495dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        id2entry_.push_back(entry);
496dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        id_ref = id2entry_.size() + first_;
497dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      } else {
498dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        return -1;
499dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return id_ref - 1;  // NB: id_ref = ID + 1
502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T &FindEntry(I s) const { return id2entry_[s - first_]; }
505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  I Size() const { return id2entry_.size(); }
507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Erase(I s) {
509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    T &entry = id2entry_[s - first_];
510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename unordered_map<T, I, F>::iterator it =
511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        entry2id_.find(entry);
512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    entry2id_.erase(it);
513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    id2entry_[s - first_] = empty_entry_;
514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      id2entry_.pop_front();
516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++first_;
517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  unordered_map<T, I, F> entry2id_;
522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  deque<T> id2entry_;
523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const T empty_entry_;
524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  I first_;        // I of first element in the deque;
525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
5265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  // disallow
5275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  void operator=(const ErasableBiTable<I, T, F> &table);  //disallow
528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_BI_TABLE_H__
533