1// bi-table.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Classes for representing a bijective mapping between an arbitrary entry
20// of type T and a signed integral ID.
21
22#ifndef FST_LIB_BI_TABLE_H__
23#define FST_LIB_BI_TABLE_H__
24
25#include <deque>
26#include <vector>
27using std::vector;
28
29namespace fst {
30
31// BI TABLES - these determine a bijective mapping between an
32// arbitrary entry of type T and an signed integral ID of type I. The IDs are
33// allocated starting from 0 in order.
34//
35// template <class I, class T>
36// class BiTable {
37//  public:
38//
39//   // Required constructors.
40//   BiTable();
41//
42//   // Lookup integer ID from entry. If it doesn't exist, then add it.
43//   I FindId(const T &entry);
44//   // Lookup entry from integer ID.
45//   const T &FindEntry(I) const;
46//   // # of stored entries.
47//   I Size() const;
48// };
49
50// An implementation using a hash map for the entry to ID mapping.
51// The  entry T must have == defined and the default constructor
52// must produce an entry that will never be seen. H is the hash function.
53template <class I, class T, class H>
54class HashBiTable {
55 public:
56
57  HashBiTable() {
58    T empty_entry;
59  }
60
61  I FindId(const T &entry) {
62    I &id_ref = entry2id_[entry];
63    if (id_ref == 0) {  // T not found; store and assign it a new ID.
64      id2entry_.push_back(entry);
65      id_ref = id2entry_.size();
66    }
67    return id_ref - 1;  // NB: id_ref = ID + 1
68  }
69
70  const T &FindEntry(I s) const {
71    return id2entry_[s];
72  }
73
74  I Size() const { return id2entry_.size(); }
75
76 private:
77  unordered_map<T, I, H> entry2id_;
78  vector<T> id2entry_;
79
80  DISALLOW_COPY_AND_ASSIGN(HashBiTable);
81};
82
83
84// An implementation using a hash set for the entry to ID
85// mapping.  The hash set holds 'keys' which are either the ID
86// or kCurrentKey.  These keys can be mapped to entrys either by
87// looking up in the entry vector or, if kCurrentKey, in current_entry_
88// member. The hash and key equality functions map to entries first.
89// The  entry T must have == defined and the default constructor
90// must produce a entry that will never be seen. H is the hash
91// function.
92template <class I, class T, class H>
93class CompactHashBiTable {
94 public:
95  friend class HashFunc;
96  friend class HashEqual;
97
98  CompactHashBiTable()
99      : hash_func_(*this),
100        hash_equal_(*this),
101        keys_(0, hash_func_, hash_equal_) {
102  }
103
104  // Reserves space for table_size elements.
105  explicit CompactHashBiTable(size_t table_size)
106      : hash_func_(*this),
107        hash_equal_(*this),
108        keys_(table_size, hash_func_, hash_equal_) {
109    id2entry_.reserve(table_size);
110  }
111
112  I FindId(const T &entry) {
113    current_entry_ = &entry;
114    typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
115    if (it == keys_.end()) {
116      I key = id2entry_.size();
117      id2entry_.push_back(entry);
118      keys_.insert(key);
119      return key;
120    } else {
121      return *it;
122    }
123  }
124
125  const T &FindEntry(I s) const { return id2entry_[s]; }
126  I Size() const { return id2entry_.size(); }
127
128 private:
129  static const I kEmptyKey;    // -1
130  static const I kCurrentKey;  // -2
131
132  class HashFunc {
133   public:
134    HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
135
136    size_t operator()(I k) const { return hf(ht_->Key2T(k)); }
137   private:
138    const CompactHashBiTable *ht_;
139    H hf;
140  };
141
142  class HashEqual {
143   public:
144    HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
145
146    bool operator()(I k1, I k2) const {
147      return ht_->Key2T(k1) == ht_->Key2T(k2);
148    }
149   private:
150    const CompactHashBiTable *ht_;
151  };
152
153  typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet;
154
155  const T &Key2T(I k) const {
156    if (k == kEmptyKey)
157      return empty_entry_;
158    else if (k == kCurrentKey)
159      return *current_entry_;
160    else
161      return id2entry_[k];
162  }
163
164  HashFunc hash_func_;
165  HashEqual hash_equal_;
166  KeyHashSet keys_;
167  vector<T> id2entry_;
168  const T empty_entry_;
169  const T *current_entry_;
170
171  DISALLOW_COPY_AND_ASSIGN(CompactHashBiTable);
172};
173
174template <class I, class T, class H>
175const I CompactHashBiTable<I, T, H>::kEmptyKey = -1;
176
177template <class I, class T, class H>
178const I CompactHashBiTable<I, T, H>::kCurrentKey = -2;
179
180
181// An implementation using a vector for the entry to ID mapping.
182// It is passed a function object FP that should fingerprint entries
183// uniquely to an integer that can used as a vector index. Normally,
184// VectorBiTable constructs the FP object.  The user can instead
185// pass in this object; in that case, VectorBiTable takes its
186// ownership.
187template <class I, class T, class FP>
188class VectorBiTable {
189 public:
190  explicit VectorBiTable(FP *fp = 0) : fp_(fp ? fp : new FP()) {}
191
192  ~VectorBiTable() { delete fp_; }
193
194  I FindId(const T &entry) {
195    ssize_t fp = (*fp_)(entry);
196    if (fp >= fp2id_.size())
197      fp2id_.resize(fp + 1);
198    I &id_ref = fp2id_[fp];
199    if (id_ref == 0) {  // T not found; store and assign it a new ID.
200      id2entry_.push_back(entry);
201      id_ref = id2entry_.size();
202    }
203    return id_ref - 1;  // NB: id_ref = ID + 1
204  }
205
206  const T &FindEntry(I s) const { return id2entry_[s]; }
207
208  I Size() const { return id2entry_.size(); }
209
210  const FP &Fingerprint() const { return *fp_; }
211
212 private:
213  FP *fp_;
214  vector<I> fp2id_;
215  vector<T> id2entry_;
216
217  DISALLOW_COPY_AND_ASSIGN(VectorBiTable);
218};
219
220
221// An implementation using a vector and a compact hash table. The
222// selecting functor S returns true for entries to be hashed in the
223// vector.  The fingerprinting functor FP returns a unique fingerprint
224// for each entry to be hashed in the vector (these need to be
225// suitable for indexing in a vector).  The hash functor H is used when
226// hashing entry into the compact hash table.
227template <class I, class T, class S, class FP, class H>
228class VectorHashBiTable {
229 public:
230  friend class HashFunc;
231  friend class HashEqual;
232
233  VectorHashBiTable(S *s, FP *fp, H *h,
234                       size_t vector_size = 0,
235                       size_t entry_size = 0)
236      : selector_(s),
237        fp_(fp),
238        h_(h),
239        hash_func_(*this),
240        hash_equal_(*this),
241        keys_(0, hash_func_, hash_equal_) {
242    if (vector_size)
243      fp2id_.reserve(vector_size);
244    if (entry_size)
245      id2entry_.reserve(entry_size);
246  }
247
248  ~VectorHashBiTable() {
249    delete selector_;
250    delete fp_;
251    delete h_;
252  }
253
254  I FindId(const T &entry) {
255    if ((*selector_)(entry)) {  // Use the vector if 'selector_(entry) == true'
256      uint64 fp = (*fp_)(entry);
257      if (fp2id_.size() <= fp)
258        fp2id_.resize(fp + 1, 0);
259      if (fp2id_[fp] == 0) {
260        id2entry_.push_back(entry);
261        fp2id_[fp] = id2entry_.size();
262      }
263      return fp2id_[fp] - 1;  // NB: assoc_value = ID + 1
264    } else {  // Use the hash table otherwise.
265      current_entry_ = &entry;
266      typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey);
267      if (it == keys_.end()) {
268        I key = id2entry_.size();
269        id2entry_.push_back(entry);
270        keys_.insert(key);
271        return key;
272      } else {
273        return *it;
274      }
275    }
276  }
277
278  const T &FindEntry(I s) const {
279    return id2entry_[s];
280  }
281
282  I Size() const { return id2entry_.size(); }
283
284  const S &Selector() const { return *selector_; }
285
286  const FP &Fingerprint() const { return *fp_; }
287
288  const H &Hash() const { return *h_; }
289
290 private:
291  static const I kEmptyKey;
292  static const I kCurrentKey;
293
294  class HashFunc {
295   public:
296    HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
297
298    size_t operator()(I k) const { return (*(ht_->h_))(ht_->Key2Entry(k)); }
299   private:
300    const VectorHashBiTable *ht_;
301  };
302
303  class HashEqual {
304   public:
305    HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
306
307    bool operator()(I k1, I k2) const {
308      return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
309    }
310   private:
311    const VectorHashBiTable *ht_;
312  };
313
314  typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet;
315
316  const T &Key2Entry(I k) const {
317    if (k == kEmptyKey)
318      return empty_entry_;
319    else if (k == kCurrentKey)
320      return *current_entry_;
321    else
322      return id2entry_[k];
323  }
324
325
326  S *selector_;  // Returns true if entry hashed into vector
327  FP *fp_;       // Fingerprint used when hashing entry into vector
328  H *h_;         // Hash function used when hashing entry into hash_set
329
330  vector<T> id2entry_;  // Maps state IDs to entry
331  vector<I> fp2id_;        // Maps entry fingerprints to IDs
332
333  // Compact implementation of the hash table mapping entrys to
334  // state IDs using the hash function 'h_'
335  HashFunc hash_func_;
336  HashEqual hash_equal_;
337  KeyHashSet keys_;
338  const T empty_entry_;
339  const T *current_entry_;
340
341  DISALLOW_COPY_AND_ASSIGN(VectorHashBiTable);
342};
343
344template <class I, class T, class S, class FP, class H>
345const I VectorHashBiTable<I, T, S, FP, H>::kEmptyKey = -1;
346
347template <class I, class T, class S, class FP, class H>
348const I VectorHashBiTable<I, T, S, FP, H>::kCurrentKey = -2;
349
350
351// An implementation using a hash map for the entry to ID
352// mapping. This version permits erasing of s.  The  entry T
353// must have == defined and its default constructor must produce a
354// entry that will never be seen. F is the hash function.
355template <class I, class T, class F>
356class ErasableBiTable {
357 public:
358  ErasableBiTable() : first_(0) {}
359
360  I FindId(const T &entry) {
361    I &id_ref = entry2id_[entry];
362    if (id_ref == 0) {  // T not found; store and assign it a new ID.
363      id2entry_.push_back(entry);
364      id_ref = id2entry_.size() + first_;
365    }
366    return id_ref - 1;  // NB: id_ref = ID + 1
367  }
368
369  const T &FindEntry(I s) const { return id2entry_[s - first_]; }
370
371  I Size() const { return id2entry_.size(); }
372
373  void Erase(I s) {
374    T &entry = id2entry_[s - first_];
375    typename unordered_map<T, I, F>::iterator it =
376        entry2id_.find(entry);
377    entry2id_.erase(it);
378    id2entry_[s - first_] = empty_entry_;
379    while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
380      id2entry_.pop_front();
381      ++first_;
382    }
383  }
384
385 private:
386  unordered_map<T, I, F> entry2id_;
387  deque<T> id2entry_;
388  const T empty_entry_;
389  I first_;        // I of first element in the deque;
390
391  DISALLOW_COPY_AND_ASSIGN(ErasableBiTable);
392};
393
394}  // namespace fst
395
396#endif  // FST_LIB_BI_TABLE_H__
397