1// bi-table.h 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: riley@google.com (Michael Riley) 17// 18// \file 19// Classes for representing a bijective mapping between an arbitrary entry 20// of type T and a signed integral ID. 21 22#ifndef FST_LIB_BI_TABLE_H__ 23#define FST_LIB_BI_TABLE_H__ 24 25#include <deque> 26using std::deque; 27#include <functional> 28#include <vector> 29using std::vector; 30 31#include <tr1/unordered_set> 32using std::tr1::unordered_set; 33using std::tr1::unordered_multiset; 34 35namespace fst { 36 37// BI TABLES - these determine a bijective mapping between an 38// arbitrary entry of type T and an signed integral ID of type I. The IDs are 39// allocated starting from 0 in order. 40// 41// template <class I, class T> 42// class BiTable { 43// public: 44// 45// // Required constructors. 46// BiTable(); 47// 48// // Lookup integer ID from entry. If it doesn't exist and 'insert' 49// / is true, then add it. Otherwise return -1. 50// I FindId(const T &entry, bool insert = true); 51// // Lookup entry from integer ID. 52// const T &FindEntry(I) const; 53// // # of stored entries. 54// I Size() const; 55// }; 56 57// An implementation using a hash map for the entry to ID mapping. 58// H is the hash function and E is the equality function. 59// If passed to the constructor, ownership is given to this class. 60 61template <class I, class T, class H, class E = std::equal_to<T> > 62class HashBiTable { 63 public: 64 // Reserves space for 'table_size' elements. 65 explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) 66 : hash_func_(h), 67 hash_equal_(e), 68 entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) { 69 if (table_size) 70 id2entry_.reserve(table_size); 71 } 72 73 HashBiTable(const HashBiTable<I, T, H, E> &table) 74 : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), 75 hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), 76 entry2id_(table.entry2id_.begin(), table.entry2id_.end(), 77 table.entry2id_.size(), 78 (hash_func_ ? *hash_func_ : H()), 79 (hash_equal_ ? *hash_equal_ : E())), 80 id2entry_(table.id2entry_) { } 81 82 ~HashBiTable() { 83 delete hash_func_; 84 delete hash_equal_; 85 } 86 87 I FindId(const T &entry, bool insert = true) { 88 I &id_ref = entry2id_[entry]; 89 if (id_ref == 0) { // T not found 90 if (insert) { // store and assign it a new ID 91 id2entry_.push_back(entry); 92 id_ref = id2entry_.size(); 93 } else { 94 return -1; 95 } 96 } 97 return id_ref - 1; // NB: id_ref = ID + 1 98 } 99 100 const T &FindEntry(I s) const { 101 return id2entry_[s]; 102 } 103 104 I Size() const { return id2entry_.size(); } 105 106 private: 107 H *hash_func_; 108 E *hash_equal_; 109 unordered_map<T, I, H, E> entry2id_; 110 vector<T> id2entry_; 111 112 void operator=(const HashBiTable<I, T, H, E> &table); // disallow 113}; 114 115 116// Enables alternative hash set representations below. 117// typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; 118typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; 119 120// Default hash set is STL hash_set 121template<class K, class H, class E, HSType> 122struct HashSet : public unordered_set<K, H, E> { 123 HashSet(size_t n = 0, const H &h = H(), const E &e = E()) 124 : unordered_set<K, H, E>(n, h, e) { } 125 126 void rehash(size_t n) { } 127}; 128 129 130// An implementation using a hash set for the entry to ID mapping. 131// The hash set holds 'keys' which are either the ID or kCurrentKey. 132// These keys can be mapped to entrys either by looking up in the 133// entry vector or, if kCurrentKey, in current_entry_ member. The hash 134// and key equality functions map to entries first. H 135// is the hash function and E is the equality function. If passed to 136// the constructor, ownership is given to this class. 137template <class I, class T, class H, 138 class E = std::equal_to<T>, HSType HS = HS_DENSE> 139class CompactHashBiTable { 140 public: 141 friend class HashFunc; 142 friend class HashEqual; 143 144 // Reserves space for 'table_size' elements. 145 explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) 146 : hash_func_(h), 147 hash_equal_(e), 148 compact_hash_func_(*this), 149 compact_hash_equal_(*this), 150 keys_(table_size, compact_hash_func_, compact_hash_equal_) { 151 if (table_size) 152 id2entry_.reserve(table_size); 153 } 154 155 CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table) 156 : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), 157 hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), 158 compact_hash_func_(*this), 159 compact_hash_equal_(*this), 160 keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_), 161 id2entry_(table.id2entry_) { 162 keys_.insert(table.keys_.begin(), table.keys_.end()); 163 } 164 165 ~CompactHashBiTable() { 166 delete hash_func_; 167 delete hash_equal_; 168 } 169 170 I FindId(const T &entry, bool insert = true) { 171 current_entry_ = &entry; 172 typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 173 if (it == keys_.end()) { // T not found 174 if (insert) { // store and assign it a new ID 175 I key = id2entry_.size(); 176 id2entry_.push_back(entry); 177 keys_.insert(key); 178 return key; 179 } else { 180 return -1; 181 } 182 } else { 183 return *it; 184 } 185 } 186 187 const T &FindEntry(I s) const { return id2entry_[s]; } 188 189 I Size() const { return id2entry_.size(); } 190 191 // Clear content. With argument, erases last n IDs. 192 void Clear(ssize_t n = -1) { 193 if (n < 0 || n > id2entry_.size()) 194 n = id2entry_.size(); 195 while (n-- > 0) { 196 I key = id2entry_.size() - 1; 197 keys_.erase(key); 198 id2entry_.pop_back(); 199 } 200 keys_.rehash(0); 201 } 202 203 private: 204 static const I kCurrentKey; // -1 205 static const I kEmptyKey; // -2 206 static const I kDeletedKey; // -3 207 208 class HashFunc { 209 public: 210 HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {} 211 212 size_t operator()(I k) const { 213 if (k >= kCurrentKey) { 214 return (*ht_->hash_func_)(ht_->Key2Entry(k)); 215 } else { 216 return 0; 217 } 218 } 219 220 private: 221 const CompactHashBiTable *ht_; 222 }; 223 224 class HashEqual { 225 public: 226 HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {} 227 228 bool operator()(I k1, I k2) const { 229 if (k1 >= kCurrentKey && k2 >= kCurrentKey) { 230 return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2)); 231 } else { 232 return k1 == k2; 233 } 234 } 235 private: 236 const CompactHashBiTable *ht_; 237 }; 238 239 typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; 240 241 const T &Key2Entry(I k) const { 242 if (k == kCurrentKey) 243 return *current_entry_; 244 else 245 return id2entry_[k]; 246 } 247 248 H *hash_func_; 249 E *hash_equal_; 250 HashFunc compact_hash_func_; 251 HashEqual compact_hash_equal_; 252 KeyHashSet keys_; 253 vector<T> id2entry_; 254 const T *current_entry_; 255 256 void operator=(const CompactHashBiTable<I, T, H, E, HS> &table); // disallow 257}; 258 259 260template <class I, class T, class H, class E, HSType HS> 261const I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1; 262 263template <class I, class T, class H, class E, HSType HS> 264const I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2; 265 266template <class I, class T, class H, class E, HSType HS> 267const I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3; 268 269 270// An implementation using a vector for the entry to ID mapping. 271// It is passed a function object FP that should fingerprint entries 272// uniquely to an integer that can used as a vector index. Normally, 273// VectorBiTable constructs the FP object. The user can instead 274// pass in this object; in that case, VectorBiTable takes its 275// ownership. 276template <class I, class T, class FP> 277class VectorBiTable { 278 public: 279 // Reserves space for 'table_size' elements. 280 explicit VectorBiTable(FP *fp = 0, size_t table_size = 0) 281 : fp_(fp ? fp : new FP()) { 282 if (table_size) 283 id2entry_.reserve(table_size); 284 } 285 286 VectorBiTable(const VectorBiTable<I, T, FP> &table) 287 : fp_(table.fp_ ? new FP(*table.fp_) : 0), 288 fp2id_(table.fp2id_), 289 id2entry_(table.id2entry_) { } 290 291 ~VectorBiTable() { delete fp_; } 292 293 I FindId(const T &entry, bool insert = true) { 294 ssize_t fp = (*fp_)(entry); 295 if (fp >= fp2id_.size()) 296 fp2id_.resize(fp + 1); 297 I &id_ref = fp2id_[fp]; 298 if (id_ref == 0) { // T not found 299 if (insert) { // store and assign it a new ID 300 id2entry_.push_back(entry); 301 id_ref = id2entry_.size(); 302 } else { 303 return -1; 304 } 305 } 306 return id_ref - 1; // NB: id_ref = ID + 1 307 } 308 309 const T &FindEntry(I s) const { return id2entry_[s]; } 310 311 I Size() const { return id2entry_.size(); } 312 313 const FP &Fingerprint() const { return *fp_; } 314 315 private: 316 FP *fp_; 317 vector<I> fp2id_; 318 vector<T> id2entry_; 319 320 void operator=(const VectorBiTable<I, T, FP> &table); // disallow 321}; 322 323 324// An implementation using a vector and a compact hash table. The 325// selecting functor S returns true for entries to be hashed in the 326// vector. The fingerprinting functor FP returns a unique fingerprint 327// for each entry to be hashed in the vector (these need to be 328// suitable for indexing in a vector). The hash functor H is used 329// when hashing entry into the compact hash table. If passed to the 330// constructor, ownership is given to this class. 331template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE> 332class VectorHashBiTable { 333 public: 334 friend class HashFunc; 335 friend class HashEqual; 336 337 explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0, 338 size_t vector_size = 0, 339 size_t entry_size = 0) 340 : selector_(s), 341 fp_(fp ? fp : new FP()), 342 h_(h ? h : new H()), 343 hash_func_(*this), 344 hash_equal_(*this), 345 keys_(0, hash_func_, hash_equal_) { 346 if (vector_size) 347 fp2id_.reserve(vector_size); 348 if (entry_size) 349 id2entry_.reserve(entry_size); 350 } 351 352 VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table) 353 : selector_(new S(table.s_)), 354 fp_(table.fp_ ? new FP(*table.fp_) : 0), 355 h_(table.h_ ? new H(*table.h_) : 0), 356 id2entry_(table.id2entry_), 357 fp2id_(table.fp2id_), 358 hash_func_(*this), 359 hash_equal_(*this), 360 keys_(table.keys_.size(), hash_func_, hash_equal_) { 361 keys_.insert(table.keys_.begin(), table.keys_.end()); 362 } 363 364 ~VectorHashBiTable() { 365 delete selector_; 366 delete fp_; 367 delete h_; 368 } 369 370 I FindId(const T &entry, bool insert = true) { 371 if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true' 372 uint64 fp = (*fp_)(entry); 373 if (fp2id_.size() <= fp) 374 fp2id_.resize(fp + 1, 0); 375 if (fp2id_[fp] == 0) { // T not found 376 if (insert) { // store and assign it a new ID 377 id2entry_.push_back(entry); 378 fp2id_[fp] = id2entry_.size(); 379 } else { 380 return -1; 381 } 382 } 383 return fp2id_[fp] - 1; // NB: assoc_value = ID + 1 384 } else { // Use the hash table otherwise. 385 current_entry_ = &entry; 386 typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 387 if (it == keys_.end()) { 388 if (insert) { 389 I key = id2entry_.size(); 390 id2entry_.push_back(entry); 391 keys_.insert(key); 392 return key; 393 } else { 394 return -1; 395 } 396 } else { 397 return *it; 398 } 399 } 400 } 401 402 const T &FindEntry(I s) const { 403 return id2entry_[s]; 404 } 405 406 I Size() const { return id2entry_.size(); } 407 408 const S &Selector() const { return *selector_; } 409 410 const FP &Fingerprint() const { return *fp_; } 411 412 const H &Hash() const { return *h_; } 413 414 private: 415 static const I kCurrentKey; // -1 416 static const I kEmptyKey; // -2 417 418 class HashFunc { 419 public: 420 HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {} 421 422 size_t operator()(I k) const { 423 if (k >= kCurrentKey) { 424 return (*(ht_->h_))(ht_->Key2Entry(k)); 425 } else { 426 return 0; 427 } 428 } 429 private: 430 const VectorHashBiTable *ht_; 431 }; 432 433 class HashEqual { 434 public: 435 HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {} 436 437 bool operator()(I k1, I k2) const { 438 if (k1 >= kCurrentKey && k2 >= kCurrentKey) { 439 return ht_->Key2Entry(k1) == ht_->Key2Entry(k2); 440 } else { 441 return k1 == k2; 442 } 443 } 444 private: 445 const VectorHashBiTable *ht_; 446 }; 447 448 typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; 449 450 const T &Key2Entry(I k) const { 451 if (k == kCurrentKey) 452 return *current_entry_; 453 else 454 return id2entry_[k]; 455 } 456 457 S *selector_; // Returns true if entry hashed into vector 458 FP *fp_; // Fingerprint used when hashing entry into vector 459 H *h_; // Hash function used when hashing entry into hash_set 460 461 vector<T> id2entry_; // Maps state IDs to entry 462 vector<I> fp2id_; // Maps entry fingerprints to IDs 463 464 // Compact implementation of the hash table mapping entrys to 465 // state IDs using the hash function 'h_' 466 HashFunc hash_func_; 467 HashEqual hash_equal_; 468 KeyHashSet keys_; 469 const T *current_entry_; 470 471 // disallow 472 void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table); 473}; 474 475template <class I, class T, class S, class FP, class H, HSType HS> 476const I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1; 477 478template <class I, class T, class S, class FP, class H, HSType HS> 479const I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3; 480 481 482// An implementation using a hash map for the entry to ID 483// mapping. This version permits erasing of arbitrary states. The 484// entry T must have == defined and its default constructor must 485// produce a entry that will never be seen. F is the hash function. 486template <class I, class T, class F> 487class ErasableBiTable { 488 public: 489 ErasableBiTable() : first_(0) {} 490 491 I FindId(const T &entry, bool insert = true) { 492 I &id_ref = entry2id_[entry]; 493 if (id_ref == 0) { // T not found 494 if (insert) { // store and assign it a new ID 495 id2entry_.push_back(entry); 496 id_ref = id2entry_.size() + first_; 497 } else { 498 return -1; 499 } 500 } 501 return id_ref - 1; // NB: id_ref = ID + 1 502 } 503 504 const T &FindEntry(I s) const { return id2entry_[s - first_]; } 505 506 I Size() const { return id2entry_.size(); } 507 508 void Erase(I s) { 509 T &entry = id2entry_[s - first_]; 510 typename unordered_map<T, I, F>::iterator it = 511 entry2id_.find(entry); 512 entry2id_.erase(it); 513 id2entry_[s - first_] = empty_entry_; 514 while (!id2entry_.empty() && id2entry_.front() == empty_entry_) { 515 id2entry_.pop_front(); 516 ++first_; 517 } 518 } 519 520 private: 521 unordered_map<T, I, F> entry2id_; 522 deque<T> id2entry_; 523 const T empty_entry_; 524 I first_; // I of first element in the deque; 525 526 // disallow 527 void operator=(const ErasableBiTable<I, T, F> &table); //disallow 528}; 529 530} // namespace fst 531 532#endif // FST_LIB_BI_TABLE_H__ 533