1// bi-table.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Classes for representing a bijective mapping between an arbitrary entry
20// of type T and a signed integral ID.
21
22#ifndef FST_LIB_BI_TABLE_H__
23#define FST_LIB_BI_TABLE_H__
24
25#include <deque>
26using std::deque;
27#include <functional>
28#include <vector>
29using std::vector;
30
31#include <tr1/unordered_set>
32using std::tr1::unordered_set;
33using std::tr1::unordered_multiset;
34
35namespace fst {
36
37// BI TABLES - these determine a bijective mapping between an
38// arbitrary entry of type T and an signed integral ID of type I. The IDs are
39// allocated starting from 0 in order.
40//
41// template <class I, class T>
42// class BiTable {
43//  public:
44//
45//   // Required constructors.
46//   BiTable();
47//
48//   // Lookup integer ID from entry. If it doesn't exist and 'insert'
49//   / is true, then add it. Otherwise return -1.
50//   I FindId(const T &entry, bool insert = true);
51//   // Lookup entry from integer ID.
52//   const T &FindEntry(I) const;
53//   // # of stored entries.
54//   I Size() const;
55// };
56
57// An implementation using a hash map for the entry to ID mapping.
58// H is the hash function and E is the equality function.
59// If passed to the constructor, ownership is given to this class.
60
61template <class I, class T, class H, class E = std::equal_to<T> >
62class HashBiTable {
63 public:
64  // Reserves space for 'table_size' elements.
65  explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
66      : hash_func_(h),
67        hash_equal_(e),
68        entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) {
69    if (table_size)
70      id2entry_.reserve(table_size);
71  }
72
73  HashBiTable(const HashBiTable<I, T, H, E> &table)
74      : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
75        hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
76        entry2id_(table.entry2id_.begin(), table.entry2id_.end(),
77                  table.entry2id_.size(),
78                  (hash_func_ ? *hash_func_ : H()),
79                  (hash_equal_ ? *hash_equal_ : E())),
80        id2entry_(table.id2entry_) { }
81
82  ~HashBiTable() {
83    delete hash_func_;
84    delete hash_equal_;
85  }
86
87  I FindId(const T &entry, bool insert = true) {
88    I &id_ref = entry2id_[entry];
89    if (id_ref == 0) {  // T not found
90      if (insert) {     // store and assign it a new ID
91        id2entry_.push_back(entry);
92        id_ref = id2entry_.size();
93      } else {
94        return -1;
95      }
96    }
97    return id_ref - 1;  // NB: id_ref = ID + 1
98  }
99
100  const T &FindEntry(I s) const {
101    return id2entry_[s];
102  }
103
104  I Size() const { return id2entry_.size(); }
105
106 private:
107  H *hash_func_;
108  E *hash_equal_;
109  unordered_map<T, I, H, E> entry2id_;
110  vector<T> id2entry_;
111
112  void operator=(const HashBiTable<I, T, H, E> &table);  // disallow
113};
114
115
116// Enables alternative hash set representations below.
117// typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
118typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType;
119
120// Default hash set is STL hash_set
121template<class K, class H, class E, HSType>
122struct  HashSet : public unordered_set<K, H, E> {
123  HashSet(size_t n = 0, const H &h = H(), const E &e = E())
124      : unordered_set<K, H, E>(n, h, e)  { }
125
126  void rehash(size_t n) { }
127};
128
129
130// An implementation using a hash set for the entry to ID mapping.
131// The hash set holds 'keys' which are either the ID or kCurrentKey.
132// These keys can be mapped to entrys either by looking up in the
133// entry vector or, if kCurrentKey, in current_entry_ member. The hash
134// and key equality functions map to entries first. H
135// is the hash function and E is the equality function. If passed to
136// the constructor, ownership is given to this class.
137template <class I, class T, class H,
138          class E = std::equal_to<T>, HSType HS = HS_DENSE>
139class CompactHashBiTable {
140 public:
141  friend class HashFunc;
142  friend class HashEqual;
143
144  // Reserves space for 'table_size' elements.
145  explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0)
146      : hash_func_(h),
147        hash_equal_(e),
148        compact_hash_func_(*this),
149        compact_hash_equal_(*this),
150        keys_(table_size, compact_hash_func_, compact_hash_equal_) {
151    if (table_size)
152      id2entry_.reserve(table_size);
153  }
154
155  CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table)
156      : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0),
157        hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0),
158        compact_hash_func_(*this),
159        compact_hash_equal_(*this),
160        keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_),
161        id2entry_(table.id2entry_) {
162    keys_.insert(table.keys_.begin(), table.keys_.end());
163 }
164
165  ~CompactHashBiTable() {
166    delete hash_func_;
167    delete hash_equal_;
168  }
169
170  I FindId(const T &entry, bool insert = true) {
171    current_entry_ = &entry;
172    typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
173    if (it == keys_.end()) {  // T not found
174      if (insert) {           // store and assign it a new ID
175        I key = id2entry_.size();
176        id2entry_.push_back(entry);
177        keys_.insert(key);
178        return key;
179      } else {
180        return -1;
181      }
182    } else {
183      return *it;
184    }
185  }
186
187  const T &FindEntry(I s) const { return id2entry_[s]; }
188
189  I Size() const { return id2entry_.size(); }
190
191  // Clear content. With argument, erases last n IDs.
192  void Clear(ssize_t n = -1) {
193    if (n < 0 || n > id2entry_.size())
194      n = id2entry_.size();
195    while (n-- > 0) {
196      I key = id2entry_.size() - 1;
197      keys_.erase(key);
198      id2entry_.pop_back();
199    }
200    keys_.rehash(0);
201  }
202
203 private:
204  static const I kCurrentKey;    // -1
205  static const I kEmptyKey;      // -2
206  static const I kDeletedKey;    // -3
207
208  class HashFunc {
209   public:
210    HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
211
212    size_t operator()(I k) const {
213      if (k >= kCurrentKey) {
214        return (*ht_->hash_func_)(ht_->Key2Entry(k));
215      } else {
216        return 0;
217      }
218    }
219
220   private:
221    const CompactHashBiTable *ht_;
222  };
223
224  class HashEqual {
225   public:
226    HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
227
228    bool operator()(I k1, I k2) const {
229      if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
230        return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2));
231      } else {
232        return k1 == k2;
233      }
234    }
235   private:
236    const CompactHashBiTable *ht_;
237  };
238
239  typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
240
241  const T &Key2Entry(I k) const {
242    if (k == kCurrentKey)
243      return *current_entry_;
244    else
245      return id2entry_[k];
246  }
247
248  H *hash_func_;
249  E *hash_equal_;
250  HashFunc compact_hash_func_;
251  HashEqual compact_hash_equal_;
252  KeyHashSet keys_;
253  vector<T> id2entry_;
254  const T *current_entry_;
255
256  void operator=(const CompactHashBiTable<I, T, H, E, HS> &table);  // disallow
257};
258
259
260template <class I, class T, class H, class E, HSType HS>
261const I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1;
262
263template <class I, class T, class H, class E, HSType HS>
264const I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2;
265
266template <class I, class T, class H, class E, HSType HS>
267const I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3;
268
269
270// An implementation using a vector for the entry to ID mapping.
271// It is passed a function object FP that should fingerprint entries
272// uniquely to an integer that can used as a vector index. Normally,
273// VectorBiTable constructs the FP object.  The user can instead
274// pass in this object; in that case, VectorBiTable takes its
275// ownership.
276template <class I, class T, class FP>
277class VectorBiTable {
278 public:
279  // Reserves space for 'table_size' elements.
280  explicit VectorBiTable(FP *fp = 0, size_t table_size = 0)
281      : fp_(fp ? fp : new FP()) {
282    if (table_size)
283      id2entry_.reserve(table_size);
284  }
285
286  VectorBiTable(const VectorBiTable<I, T, FP> &table)
287      : fp_(table.fp_ ? new FP(*table.fp_) : 0),
288        fp2id_(table.fp2id_),
289        id2entry_(table.id2entry_) { }
290
291  ~VectorBiTable() { delete fp_; }
292
293  I FindId(const T &entry, bool insert = true) {
294    ssize_t fp = (*fp_)(entry);
295    if (fp >= fp2id_.size())
296      fp2id_.resize(fp + 1);
297    I &id_ref = fp2id_[fp];
298    if (id_ref == 0) {  // T not found
299      if (insert) {     // store and assign it a new ID
300        id2entry_.push_back(entry);
301        id_ref = id2entry_.size();
302      } else {
303        return -1;
304      }
305    }
306    return id_ref - 1;  // NB: id_ref = ID + 1
307  }
308
309  const T &FindEntry(I s) const { return id2entry_[s]; }
310
311  I Size() const { return id2entry_.size(); }
312
313  const FP &Fingerprint() const { return *fp_; }
314
315 private:
316  FP *fp_;
317  vector<I> fp2id_;
318  vector<T> id2entry_;
319
320  void operator=(const VectorBiTable<I, T, FP> &table);  // disallow
321};
322
323
324// An implementation using a vector and a compact hash table. The
325// selecting functor S returns true for entries to be hashed in the
326// vector.  The fingerprinting functor FP returns a unique fingerprint
327// for each entry to be hashed in the vector (these need to be
328// suitable for indexing in a vector).  The hash functor H is used
329// when hashing entry into the compact hash table.  If passed to the
330// constructor, ownership is given to this class.
331template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE>
332class VectorHashBiTable {
333 public:
334  friend class HashFunc;
335  friend class HashEqual;
336
337  explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0,
338                             size_t vector_size = 0,
339                             size_t entry_size = 0)
340      : selector_(s),
341        fp_(fp ? fp : new FP()),
342        h_(h ? h : new H()),
343        hash_func_(*this),
344        hash_equal_(*this),
345        keys_(0, hash_func_, hash_equal_) {
346    if (vector_size)
347      fp2id_.reserve(vector_size);
348    if (entry_size)
349      id2entry_.reserve(entry_size);
350 }
351
352  VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table)
353      : selector_(new S(table.s_)),
354        fp_(table.fp_ ? new FP(*table.fp_) : 0),
355        h_(table.h_ ? new H(*table.h_) : 0),
356        id2entry_(table.id2entry_),
357        fp2id_(table.fp2id_),
358        hash_func_(*this),
359        hash_equal_(*this),
360        keys_(table.keys_.size(), hash_func_, hash_equal_) {
361     keys_.insert(table.keys_.begin(), table.keys_.end());
362  }
363
364  ~VectorHashBiTable() {
365    delete selector_;
366    delete fp_;
367    delete h_;
368  }
369
370  I FindId(const T &entry, bool insert = true) {
371    if ((*selector_)(entry)) {  // Use the vector if 'selector_(entry) == true'
372      uint64 fp = (*fp_)(entry);
373      if (fp2id_.size() <= fp)
374        fp2id_.resize(fp + 1, 0);
375      if (fp2id_[fp] == 0) {         // T not found
376        if (insert) {                // store and assign it a new ID
377          id2entry_.push_back(entry);
378          fp2id_[fp] = id2entry_.size();
379        } else {
380          return -1;
381        }
382      }
383      return fp2id_[fp] - 1;  // NB: assoc_value = ID + 1
384    } else {  // Use the hash table otherwise.
385      current_entry_ = &entry;
386      typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
387      if (it == keys_.end()) {
388        if (insert) {
389          I key = id2entry_.size();
390          id2entry_.push_back(entry);
391          keys_.insert(key);
392          return key;
393        } else {
394          return -1;
395        }
396      } else {
397        return *it;
398      }
399    }
400  }
401
402  const T &FindEntry(I s) const {
403    return id2entry_[s];
404  }
405
406  I Size() const { return id2entry_.size(); }
407
408  const S &Selector() const { return *selector_; }
409
410  const FP &Fingerprint() const { return *fp_; }
411
412  const H &Hash() const { return *h_; }
413
414 private:
415  static const I kCurrentKey;  // -1
416  static const I kEmptyKey;    // -2
417
418  class HashFunc {
419   public:
420    HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
421
422    size_t operator()(I k) const {
423      if (k >= kCurrentKey) {
424        return (*(ht_->h_))(ht_->Key2Entry(k));
425      } else {
426        return 0;
427      }
428    }
429   private:
430    const VectorHashBiTable *ht_;
431  };
432
433  class HashEqual {
434   public:
435    HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
436
437    bool operator()(I k1, I k2) const {
438      if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
439        return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
440      } else {
441        return k1 == k2;
442      }
443    }
444   private:
445    const VectorHashBiTable *ht_;
446  };
447
448  typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet;
449
450  const T &Key2Entry(I k) const {
451    if (k == kCurrentKey)
452      return *current_entry_;
453    else
454      return id2entry_[k];
455  }
456
457  S *selector_;  // Returns true if entry hashed into vector
458  FP *fp_;       // Fingerprint used when hashing entry into vector
459  H *h_;         // Hash function used when hashing entry into hash_set
460
461  vector<T> id2entry_;  // Maps state IDs to entry
462  vector<I> fp2id_;     // Maps entry fingerprints to IDs
463
464  // Compact implementation of the hash table mapping entrys to
465  // state IDs using the hash function 'h_'
466  HashFunc hash_func_;
467  HashEqual hash_equal_;
468  KeyHashSet keys_;
469  const T *current_entry_;
470
471  // disallow
472  void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table);
473};
474
475template <class I, class T, class S, class FP, class H, HSType HS>
476const I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1;
477
478template <class I, class T, class S, class FP, class H, HSType HS>
479const I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3;
480
481
482// An implementation using a hash map for the entry to ID
483// mapping. This version permits erasing of arbitrary states.  The
484// entry T must have == defined and its default constructor must
485// produce a entry that will never be seen. F is the hash function.
486template <class I, class T, class F>
487class ErasableBiTable {
488 public:
489  ErasableBiTable() : first_(0) {}
490
491  I FindId(const T &entry, bool insert = true) {
492    I &id_ref = entry2id_[entry];
493    if (id_ref == 0) {  // T not found
494      if (insert) {     // store and assign it a new ID
495        id2entry_.push_back(entry);
496        id_ref = id2entry_.size() + first_;
497      } else {
498        return -1;
499      }
500    }
501    return id_ref - 1;  // NB: id_ref = ID + 1
502  }
503
504  const T &FindEntry(I s) const { return id2entry_[s - first_]; }
505
506  I Size() const { return id2entry_.size(); }
507
508  void Erase(I s) {
509    T &entry = id2entry_[s - first_];
510    typename unordered_map<T, I, F>::iterator it =
511        entry2id_.find(entry);
512    entry2id_.erase(it);
513    id2entry_[s - first_] = empty_entry_;
514    while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
515      id2entry_.pop_front();
516      ++first_;
517    }
518  }
519
520 private:
521  unordered_map<T, I, F> entry2id_;
522  deque<T> id2entry_;
523  const T empty_entry_;
524  I first_;        // I of first element in the deque;
525
526  // disallow
527  void operator=(const ErasableBiTable<I, T, F> &table);  //disallow
528};
529
530}  // namespace fst
531
532#endif  // FST_LIB_BI_TABLE_H__
533