1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// bi-table.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Classes for representing a bijective mapping between an arbitrary entry 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// of type T and a signed integral ID. 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_BI_TABLE_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_BI_TABLE_H__ 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <deque> 265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinusing std::deque; 275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <functional> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <tr1/unordered_set> 325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinusing std::tr1::unordered_set; 335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinusing std::tr1::unordered_multiset; 345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// BI TABLES - these determine a bijective mapping between an 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// arbitrary entry of type T and an signed integral ID of type I. The IDs are 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// allocated starting from 0 in order. 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// template <class I, class T> 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// class BiTable { 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// public: 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Required constructors. 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// BiTable(); 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 48dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// // Lookup integer ID from entry. If it doesn't exist and 'insert' 49dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// / is true, then add it. Otherwise return -1. 50dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// I FindId(const T &entry, bool insert = true); 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // Lookup entry from integer ID. 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// const T &FindEntry(I) const; 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// // # of stored entries. 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// I Size() const; 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// }; 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// An implementation using a hash map for the entry to ID mapping. 585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// H is the hash function and E is the equality function. 595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// If passed to the constructor, ownership is given to this class. 605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class H, class E = std::equal_to<T> > 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass HashBiTable { 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Reserves space for 'table_size' elements. 655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) 665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : hash_func_(h), 675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin hash_equal_(e), 685b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) { 695b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (table_size) 705b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin id2entry_.reserve(table_size); 715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 735b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin HashBiTable(const HashBiTable<I, T, H, E> &table) 745b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), 755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), 765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin entry2id_(table.entry2id_.begin(), table.entry2id_.end(), 775b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin table.entry2id_.size(), 785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin (hash_func_ ? *hash_func_ : H()), 795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin (hash_equal_ ? *hash_equal_ : E())), 805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin id2entry_(table.id2entry_) { } 815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ~HashBiTable() { 835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin delete hash_func_; 845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin delete hash_equal_; 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 87dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin I FindId(const T &entry, bool insert = true) { 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I &id_ref = entry2id_[entry]; 89dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (id_ref == 0) { // T not found 90dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (insert) { // store and assign it a new ID 91dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin id2entry_.push_back(entry); 92dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin id_ref = id2entry_.size(); 93dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } else { 94dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin return -1; 95dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return id_ref - 1; // NB: id_ref = ID + 1 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T &FindEntry(I s) const { 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return id2entry_[s]; 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I Size() const { return id2entry_.size(); } 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 1075b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin H *hash_func_; 1085b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin E *hash_equal_; 1095b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin unordered_map<T, I, H, E> entry2id_; 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<T> id2entry_; 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void operator=(const HashBiTable<I, T, H, E> &table); // disallow 1135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin}; 1145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Enables alternative hash set representations below. 1175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; 1185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintypedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; 1195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// Default hash set is STL hash_set 1215b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate<class K, class H, class E, HSType> 1225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinstruct HashSet : public unordered_set<K, H, E> { 1235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin HashSet(size_t n = 0, const H &h = H(), const E &e = E()) 1245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : unordered_set<K, H, E>(n, h, e) { } 1255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void rehash(size_t n) { } 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// An implementation using a hash set for the entry to ID mapping. 1315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// The hash set holds 'keys' which are either the ID or kCurrentKey. 1325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// These keys can be mapped to entrys either by looking up in the 1335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// entry vector or, if kCurrentKey, in current_entry_ member. The hash 1345b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// and key equality functions map to entries first. H 1355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// is the hash function and E is the equality function. If passed to 1365b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// the constructor, ownership is given to this class. 1375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class H, 1385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin class E = std::equal_to<T>, HSType HS = HS_DENSE> 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass CompactHashBiTable { 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class HashFunc; 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class HashEqual; 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1445b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Reserves space for 'table_size' elements. 1455b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) 1465b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : hash_func_(h), 1475b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin hash_equal_(e), 1485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin compact_hash_func_(*this), 1495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin compact_hash_equal_(*this), 1505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin keys_(table_size, compact_hash_func_, compact_hash_equal_) { 1515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (table_size) 1525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin id2entry_.reserve(table_size); 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table) 1565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), 1575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), 1585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin compact_hash_func_(*this), 1595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin compact_hash_equal_(*this), 1605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_), 1615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin id2entry_(table.id2entry_) { 1625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin keys_.insert(table.keys_.begin(), table.keys_.end()); 1635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 1645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 1655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin ~CompactHashBiTable() { 1665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin delete hash_func_; 1675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin delete hash_equal_; 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 170dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin I FindId(const T &entry, bool insert = true) { 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_entry_ = &entry; 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 173dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (it == keys_.end()) { // T not found 174dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (insert) { // store and assign it a new ID 175dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin I key = id2entry_.size(); 176dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin id2entry_.push_back(entry); 177dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin keys_.insert(key); 178dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin return key; 179dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } else { 180dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin return -1; 181dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return *it; 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T &FindEntry(I s) const { return id2entry_[s]; } 1885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I Size() const { return id2entry_.size(); } 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 1915b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Clear content. With argument, erases last n IDs. 1925b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void Clear(ssize_t n = -1) { 1935b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (n < 0 || n > id2entry_.size()) 1945b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin n = id2entry_.size(); 1955b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin while (n-- > 0) { 1965b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin I key = id2entry_.size() - 1; 1975b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin keys_.erase(key); 1985b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin id2entry_.pop_back(); 1995b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2005b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin keys_.rehash(0); 2015b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2025b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 2045b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin static const I kCurrentKey; // -1 2055b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin static const I kEmptyKey; // -2 2065b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin static const I kDeletedKey; // -3 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class HashFunc { 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {} 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 2125b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin size_t operator()(I k) const { 2135b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (k >= kCurrentKey) { 2145b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return (*ht_->hash_func_)(ht_->Key2Entry(k)); 2155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 2165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return 0; 2175b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2185b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2195b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const CompactHashBiTable *ht_; 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class HashEqual { 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {} 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator()(I k1, I k2) const { 2295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (k1 >= kCurrentKey && k2 >= kCurrentKey) { 2305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2)); 2315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 2325b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return k1 == k2; 2335b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const CompactHashBiTable *ht_; 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 2395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 2415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin const T &Key2Entry(I k) const { 2425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (k == kCurrentKey) 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return *current_entry_; 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return id2entry_[k]; 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 2485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin H *hash_func_; 2495b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin E *hash_equal_; 2505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin HashFunc compact_hash_func_; 2515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin HashEqual compact_hash_equal_; 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson KeyHashSet keys_; 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<T> id2entry_; 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T *current_entry_; 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 2565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void operator=(const CompactHashBiTable<I, T, H, E, HS> &table); // disallow 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 2605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class H, class E, HSType HS> 2615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1; 2625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class H, class E, HSType HS> 2645b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2; 2655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2665b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class H, class E, HSType HS> 2675b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3; 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// An implementation using a vector for the entry to ID mapping. 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// It is passed a function object FP that should fingerprint entries 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// uniquely to an integer that can used as a vector index. Normally, 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// VectorBiTable constructs the FP object. The user can instead 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// pass in this object; in that case, VectorBiTable takes its 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ownership. 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T, class FP> 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass VectorBiTable { 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 2795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // Reserves space for 'table_size' elements. 2805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin explicit VectorBiTable(FP *fp = 0, size_t table_size = 0) 2815b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : fp_(fp ? fp : new FP()) { 2825b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (table_size) 2835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin id2entry_.reserve(table_size); 2845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 2855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 2865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin VectorBiTable(const VectorBiTable<I, T, FP> &table) 2875b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : fp_(table.fp_ ? new FP(*table.fp_) : 0), 2885b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin fp2id_(table.fp2id_), 2895b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin id2entry_(table.id2entry_) { } 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~VectorBiTable() { delete fp_; } 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 293dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin I FindId(const T &entry, bool insert = true) { 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t fp = (*fp_)(entry); 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fp >= fp2id_.size()) 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fp2id_.resize(fp + 1); 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I &id_ref = fp2id_[fp]; 298dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (id_ref == 0) { // T not found 299dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (insert) { // store and assign it a new ID 300dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin id2entry_.push_back(entry); 301dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin id_ref = id2entry_.size(); 302dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } else { 303dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin return -1; 304dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } 305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return id_ref - 1; // NB: id_ref = ID + 1 307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T &FindEntry(I s) const { return id2entry_[s]; } 310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I Size() const { return id2entry_.size(); } 312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FP &Fingerprint() const { return *fp_; } 314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FP *fp_; 317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<I> fp2id_; 318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<T> id2entry_; 319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3205b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void operator=(const VectorBiTable<I, T, FP> &table); // disallow 321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// An implementation using a vector and a compact hash table. The 325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// selecting functor S returns true for entries to be hashed in the 326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// vector. The fingerprinting functor FP returns a unique fingerprint 327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// for each entry to be hashed in the vector (these need to be 3285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// suitable for indexing in a vector). The hash functor H is used 3295b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// when hashing entry into the compact hash table. If passed to the 3305b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// constructor, ownership is given to this class. 3315b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE> 332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass VectorHashBiTable { 333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class HashFunc; 335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class HashEqual; 336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3375b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0, 3385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin size_t vector_size = 0, 3395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin size_t entry_size = 0) 340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : selector_(s), 3415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin fp_(fp ? fp : new FP()), 3425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin h_(h ? h : new H()), 343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hash_func_(*this), 344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson hash_equal_(*this), 345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson keys_(0, hash_func_, hash_equal_) { 346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (vector_size) 347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fp2id_.reserve(vector_size); 348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (entry_size) 349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson id2entry_.reserve(entry_size); 3505b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 3515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin 3525b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table) 3535b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin : selector_(new S(table.s_)), 3545b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin fp_(table.fp_ ? new FP(*table.fp_) : 0), 3555b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin h_(table.h_ ? new H(*table.h_) : 0), 3565b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin id2entry_(table.id2entry_), 3575b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin fp2id_(table.fp2id_), 3585b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin hash_func_(*this), 3595b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin hash_equal_(*this), 3605b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin keys_(table.keys_.size(), hash_func_, hash_equal_) { 3615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin keys_.insert(table.keys_.begin(), table.keys_.end()); 362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~VectorHashBiTable() { 365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete selector_; 366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete fp_; 367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete h_; 368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 370dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin I FindId(const T &entry, bool insert = true) { 371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true' 372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 fp = (*fp_)(entry); 373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (fp2id_.size() <= fp) 374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fp2id_.resize(fp + 1, 0); 375dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (fp2id_[fp] == 0) { // T not found 376dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (insert) { // store and assign it a new ID 377dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin id2entry_.push_back(entry); 378dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin fp2id_[fp] = id2entry_.size(); 379dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } else { 380dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin return -1; 381dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } 382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return fp2id_[fp] - 1; // NB: assoc_value = ID + 1 384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { // Use the hash table otherwise. 385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_entry_ = &entry; 386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); 387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (it == keys_.end()) { 388dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (insert) { 389dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin I key = id2entry_.size(); 390dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin id2entry_.push_back(entry); 391dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin keys_.insert(key); 392dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin return key; 393dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } else { 394dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin return -1; 395dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } 396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return *it; 398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T &FindEntry(I s) const { 403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return id2entry_[s]; 404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I Size() const { return id2entry_.size(); } 407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const S &Selector() const { return *selector_; } 409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const FP &Fingerprint() const { return *fp_; } 411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const H &Hash() const { return *h_; } 413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 4155b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin static const I kCurrentKey; // -1 4165b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin static const I kEmptyKey; // -2 417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class HashFunc { 419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {} 421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4225b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin size_t operator()(I k) const { 4235b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (k >= kCurrentKey) { 4245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return (*(ht_->h_))(ht_->Key2Entry(k)); 4255b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 4265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return 0; 4275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 4285b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const VectorHashBiTable *ht_; 431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class HashEqual { 434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {} 436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator()(I k1, I k2) const { 4385b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (k1 >= kCurrentKey && k2 >= kCurrentKey) { 4395b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return ht_->Key2Entry(k1) == ht_->Key2Entry(k2); 4405b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } else { 4415b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin return k1 == k2; 4425b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin } 443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const VectorHashBiTable *ht_; 446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4485b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; 449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T &Key2Entry(I k) const { 4515b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (k == kCurrentKey) 452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return *current_entry_; 453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return id2entry_[k]; 455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson S *selector_; // Returns true if entry hashed into vector 458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FP *fp_; // Fingerprint used when hashing entry into vector 459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson H *h_; // Hash function used when hashing entry into hash_set 460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<T> id2entry_; // Maps state IDs to entry 4625b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin vector<I> fp2id_; // Maps entry fingerprints to IDs 463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Compact implementation of the hash table mapping entrys to 465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // state IDs using the hash function 'h_' 466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson HashFunc hash_func_; 467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson HashEqual hash_equal_; 468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson KeyHashSet keys_; 469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T *current_entry_; 470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // disallow 4725b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table); 473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4755b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class S, class FP, class H, HSType HS> 4765b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1; 477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4785b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkintemplate <class I, class T, class S, class FP, class H, HSType HS> 4795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinconst I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3; 480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// An implementation using a hash map for the entry to ID 4835b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// mapping. This version permits erasing of arbitrary states. The 4845b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// entry T must have == defined and its default constructor must 4855b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin// produce a entry that will never be seen. F is the hash function. 486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class I, class T, class F> 487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ErasableBiTable { 488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ErasableBiTable() : first_(0) {} 490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 491dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin I FindId(const T &entry, bool insert = true) { 492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I &id_ref = entry2id_[entry]; 493dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (id_ref == 0) { // T not found 494dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin if (insert) { // store and assign it a new ID 495dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin id2entry_.push_back(entry); 496dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin id_ref = id2entry_.size() + first_; 497dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } else { 498dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin return -1; 499dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin } 500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return id_ref - 1; // NB: id_ref = ID + 1 502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T &FindEntry(I s) const { return id2entry_[s - first_]; } 505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I Size() const { return id2entry_.size(); } 507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Erase(I s) { 509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson T &entry = id2entry_[s - first_]; 510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename unordered_map<T, I, F>::iterator it = 511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson entry2id_.find(entry); 512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson entry2id_.erase(it); 513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson id2entry_[s - first_] = empty_entry_; 514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (!id2entry_.empty() && id2entry_.front() == empty_entry_) { 515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson id2entry_.pop_front(); 516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++first_; 517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_map<T, I, F> entry2id_; 522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson deque<T> id2entry_; 523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const T empty_entry_; 524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson I first_; // I of first element in the deque; 525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 5265b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin // disallow 5275b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin void operator=(const ErasableBiTable<I, T, F> &table); //disallow 528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_BI_TABLE_H__ 533