bi-table.h revision dfd8b8327b93660601d016cdc6f29f433b45a8d8
1// bi-table.h 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: riley@google.com (Michael Riley) 17// 18// \file 19// Classes for representing a bijective mapping between an arbitrary entry 20// of type T and a signed integral ID. 21 22#ifndef FST_LIB_BI_TABLE_H__ 23#define FST_LIB_BI_TABLE_H__ 24 25#include <deque> 26#include <vector> 27using std::vector; 28 29namespace fst { 30 31// BI TABLES - these determine a bijective mapping between an 32// arbitrary entry of type T and an signed integral ID of type I. The IDs are 33// allocated starting from 0 in order. 34// 35// template <class I, class T> 36// class BiTable { 37// public: 38// 39// // Required constructors. 40// BiTable(); 41// 42// // Lookup integer ID from entry. If it doesn't exist and 'insert' 43// / is true, then add it. Otherwise return -1. 44// I FindId(const T &entry, bool insert = true); 45// // Lookup entry from integer ID. 46// const T &FindEntry(I) const; 47// // # of stored entries. 48// I Size() const; 49// }; 50 51// An implementation using a hash map for the entry to ID mapping. 52// The entry T must have == defined and the default constructor 53// must produce an entry that will never be seen. H is the hash function. 54template <class I, class T, class H> 55class HashBiTable { 56 public: 57 58 HashBiTable() { 59 T empty_entry; 60 } 61 62 I FindId(const T &entry, bool insert = true) { 63 I &id_ref = entry2id_[entry]; 64 if (id_ref == 0) { // T not found 65 if (insert) { // store and assign it a new ID 66 id2entry_.push_back(entry); 67 id_ref = id2entry_.size(); 68 } else { 69 return -1; 70 } 71 } 72 return id_ref - 1; // NB: id_ref = ID + 1 73 } 74 75 const T &FindEntry(I s) const { 76 return id2entry_[s]; 77 } 78 79 I Size() const { return id2entry_.size(); } 80 81 private: 82 unordered_map<T, I, H> entry2id_; 83 vector<T> id2entry_; 84 85 DISALLOW_COPY_AND_ASSIGN(HashBiTable); 86}; 87 88 89// An implementation using a hash set for the entry to ID 90// mapping. The hash set holds 'keys' which are either the ID 91// or kCurrentKey. These keys can be mapped to entrys either by 92// looking up in the entry vector or, if kCurrentKey, in current_entry_ 93// member. The hash and key equality functions map to entries first. 94// The entry T must have == defined and the default constructor 95// must produce a entry that will never be seen. H is the hash 96// function. 97template <class I, class T, class H> 98class CompactHashBiTable { 99 public: 100 friend class HashFunc; 101 friend class HashEqual; 102 103 CompactHashBiTable() 104 : hash_func_(*this), 105 hash_equal_(*this), 106 keys_(0, hash_func_, hash_equal_) { 107 } 108 109 // Reserves space for table_size elements. 110 explicit CompactHashBiTable(size_t table_size) 111 : hash_func_(*this), 112 hash_equal_(*this), 113 keys_(table_size, hash_func_, hash_equal_) { 114 id2entry_.reserve(table_size); 115 } 116 117 I FindId(const T &entry, bool insert = true) { 118 current_entry_ = &entry; 119 typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 120 if (it == keys_.end()) { // T not found 121 if (insert) { // store and assign it a new ID 122 I key = id2entry_.size(); 123 id2entry_.push_back(entry); 124 keys_.insert(key); 125 return key; 126 } else { 127 return -1; 128 } 129 } else { 130 return *it; 131 } 132 } 133 134 const T &FindEntry(I s) const { return id2entry_[s]; } 135 I Size() const { return id2entry_.size(); } 136 137 private: 138 static const I kEmptyKey; // -1 139 static const I kCurrentKey; // -2 140 141 class HashFunc { 142 public: 143 HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {} 144 145 size_t operator()(I k) const { return hf(ht_->Key2T(k)); } 146 private: 147 const CompactHashBiTable *ht_; 148 H hf; 149 }; 150 151 class HashEqual { 152 public: 153 HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {} 154 155 bool operator()(I k1, I k2) const { 156 return ht_->Key2T(k1) == ht_->Key2T(k2); 157 } 158 private: 159 const CompactHashBiTable *ht_; 160 }; 161 162 typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet; 163 164 const T &Key2T(I k) const { 165 if (k == kEmptyKey) 166 return empty_entry_; 167 else if (k == kCurrentKey) 168 return *current_entry_; 169 else 170 return id2entry_[k]; 171 } 172 173 HashFunc hash_func_; 174 HashEqual hash_equal_; 175 KeyHashSet keys_; 176 vector<T> id2entry_; 177 const T empty_entry_; 178 const T *current_entry_; 179 180 DISALLOW_COPY_AND_ASSIGN(CompactHashBiTable); 181}; 182 183template <class I, class T, class H> 184const I CompactHashBiTable<I, T, H>::kEmptyKey = -1; 185 186template <class I, class T, class H> 187const I CompactHashBiTable<I, T, H>::kCurrentKey = -2; 188 189 190// An implementation using a vector for the entry to ID mapping. 191// It is passed a function object FP that should fingerprint entries 192// uniquely to an integer that can used as a vector index. Normally, 193// VectorBiTable constructs the FP object. The user can instead 194// pass in this object; in that case, VectorBiTable takes its 195// ownership. 196template <class I, class T, class FP> 197class VectorBiTable { 198 public: 199 explicit VectorBiTable(FP *fp = 0) : fp_(fp ? fp : new FP()) {} 200 201 ~VectorBiTable() { delete fp_; } 202 203 I FindId(const T &entry, bool insert = true) { 204 ssize_t fp = (*fp_)(entry); 205 if (fp >= fp2id_.size()) 206 fp2id_.resize(fp + 1); 207 I &id_ref = fp2id_[fp]; 208 if (id_ref == 0) { // T not found 209 if (insert) { // store and assign it a new ID 210 id2entry_.push_back(entry); 211 id_ref = id2entry_.size(); 212 } else { 213 return -1; 214 } 215 } 216 return id_ref - 1; // NB: id_ref = ID + 1 217 } 218 219 const T &FindEntry(I s) const { return id2entry_[s]; } 220 221 I Size() const { return id2entry_.size(); } 222 223 const FP &Fingerprint() const { return *fp_; } 224 225 private: 226 FP *fp_; 227 vector<I> fp2id_; 228 vector<T> id2entry_; 229 230 DISALLOW_COPY_AND_ASSIGN(VectorBiTable); 231}; 232 233 234// An implementation using a vector and a compact hash table. The 235// selecting functor S returns true for entries to be hashed in the 236// vector. The fingerprinting functor FP returns a unique fingerprint 237// for each entry to be hashed in the vector (these need to be 238// suitable for indexing in a vector). The hash functor H is used when 239// hashing entry into the compact hash table. 240template <class I, class T, class S, class FP, class H> 241class VectorHashBiTable { 242 public: 243 friend class HashFunc; 244 friend class HashEqual; 245 246 VectorHashBiTable(S *s, FP *fp, H *h, 247 size_t vector_size = 0, 248 size_t entry_size = 0) 249 : selector_(s), 250 fp_(fp), 251 h_(h), 252 hash_func_(*this), 253 hash_equal_(*this), 254 keys_(0, hash_func_, hash_equal_) { 255 if (vector_size) 256 fp2id_.reserve(vector_size); 257 if (entry_size) 258 id2entry_.reserve(entry_size); 259 } 260 261 ~VectorHashBiTable() { 262 delete selector_; 263 delete fp_; 264 delete h_; 265 } 266 267 I FindId(const T &entry, bool insert = true) { 268 if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true' 269 uint64 fp = (*fp_)(entry); 270 if (fp2id_.size() <= fp) 271 fp2id_.resize(fp + 1, 0); 272 if (fp2id_[fp] == 0) { // T not found 273 if (insert) { // store and assign it a new ID 274 id2entry_.push_back(entry); 275 fp2id_[fp] = id2entry_.size(); 276 } else { 277 return -1; 278 } 279 } 280 return fp2id_[fp] - 1; // NB: assoc_value = ID + 1 281 } else { // Use the hash table otherwise. 282 current_entry_ = &entry; 283 typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 284 if (it == keys_.end()) { 285 if (insert) { 286 I key = id2entry_.size(); 287 id2entry_.push_back(entry); 288 keys_.insert(key); 289 return key; 290 } else { 291 return -1; 292 } 293 } else { 294 return *it; 295 } 296 } 297 } 298 299 const T &FindEntry(I s) const { 300 return id2entry_[s]; 301 } 302 303 I Size() const { return id2entry_.size(); } 304 305 const S &Selector() const { return *selector_; } 306 307 const FP &Fingerprint() const { return *fp_; } 308 309 const H &Hash() const { return *h_; } 310 311 private: 312 static const I kEmptyKey; 313 static const I kCurrentKey; 314 315 class HashFunc { 316 public: 317 HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {} 318 319 size_t operator()(I k) const { return (*(ht_->h_))(ht_->Key2Entry(k)); } 320 private: 321 const VectorHashBiTable *ht_; 322 }; 323 324 class HashEqual { 325 public: 326 HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {} 327 328 bool operator()(I k1, I k2) const { 329 return ht_->Key2Entry(k1) == ht_->Key2Entry(k2); 330 } 331 private: 332 const VectorHashBiTable *ht_; 333 }; 334 335 typedef unordered_set<I, HashFunc, HashEqual> KeyHashSet; 336 337 const T &Key2Entry(I k) const { 338 if (k == kEmptyKey) 339 return empty_entry_; 340 else if (k == kCurrentKey) 341 return *current_entry_; 342 else 343 return id2entry_[k]; 344 } 345 346 347 S *selector_; // Returns true if entry hashed into vector 348 FP *fp_; // Fingerprint used when hashing entry into vector 349 H *h_; // Hash function used when hashing entry into hash_set 350 351 vector<T> id2entry_; // Maps state IDs to entry 352 vector<I> fp2id_; // Maps entry fingerprints to IDs 353 354 // Compact implementation of the hash table mapping entrys to 355 // state IDs using the hash function 'h_' 356 HashFunc hash_func_; 357 HashEqual hash_equal_; 358 KeyHashSet keys_; 359 const T empty_entry_; 360 const T *current_entry_; 361 362 DISALLOW_COPY_AND_ASSIGN(VectorHashBiTable); 363}; 364 365template <class I, class T, class S, class FP, class H> 366const I VectorHashBiTable<I, T, S, FP, H>::kEmptyKey = -1; 367 368template <class I, class T, class S, class FP, class H> 369const I VectorHashBiTable<I, T, S, FP, H>::kCurrentKey = -2; 370 371 372// An implementation using a hash map for the entry to ID 373// mapping. This version permits erasing of s. The entry T 374// must have == defined and its default constructor must produce a 375// entry that will never be seen. F is the hash function. 376template <class I, class T, class F> 377class ErasableBiTable { 378 public: 379 ErasableBiTable() : first_(0) {} 380 381 I FindId(const T &entry, bool insert = true) { 382 I &id_ref = entry2id_[entry]; 383 if (id_ref == 0) { // T not found 384 if (insert) { // store and assign it a new ID 385 id2entry_.push_back(entry); 386 id_ref = id2entry_.size() + first_; 387 } else { 388 return -1; 389 } 390 } 391 return id_ref - 1; // NB: id_ref = ID + 1 392 } 393 394 const T &FindEntry(I s) const { return id2entry_[s - first_]; } 395 396 I Size() const { return id2entry_.size(); } 397 398 void Erase(I s) { 399 T &entry = id2entry_[s - first_]; 400 typename unordered_map<T, I, F>::iterator it = 401 entry2id_.find(entry); 402 entry2id_.erase(it); 403 id2entry_[s - first_] = empty_entry_; 404 while (!id2entry_.empty() && id2entry_.front() == empty_entry_) { 405 id2entry_.pop_front(); 406 ++first_; 407 } 408 } 409 410 private: 411 unordered_map<T, I, F> entry2id_; 412 deque<T> id2entry_; 413 const T empty_entry_; 414 I first_; // I of first element in the deque; 415 416 DISALLOW_COPY_AND_ASSIGN(ErasableBiTable); 417}; 418 419} // namespace fst 420 421#endif // FST_LIB_BI_TABLE_H__ 422