1// state-table.h 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: riley@google.com (Michael Riley) 17// 18// \file 19// Classes for representing the mapping between state tuples and state Ids. 20 21#ifndef FST_LIB_STATE_TABLE_H__ 22#define FST_LIB_STATE_TABLE_H__ 23 24#include <deque> 25using std::deque; 26#include <vector> 27using std::vector; 28 29#include <fst/bi-table.h> 30#include <fst/expanded-fst.h> 31 32 33namespace fst { 34 35// STATE TABLES - these determine the bijective mapping between state 36// tuples (e.g. in composition triples of two FST states and a 37// composition filter state) and their corresponding state IDs. 38// They are classes, templated on state tuples, of the form: 39// 40// template <class T> 41// class StateTable { 42// public: 43// typedef typename T StateTuple; 44// 45// // Required constructors. 46// StateTable(); 47// 48// // Lookup state ID by tuple. If it doesn't exist, then add it. 49// StateId FindState(const StateTuple &); 50// // Lookup state tuple by state ID. 51// const StateTuple<StateId> &Tuple(StateId) const; 52// // # of stored tuples. 53// StateId Size() const; 54// }; 55// 56// A state tuple has the form: 57// 58// template <class S> 59// struct StateTuple { 60// typedef typename S StateId; 61// 62// // Required constructors. 63// StateTuple(); 64// StateTuple(const StateTuple &); 65// }; 66 67 68// An implementation using a hash map for the tuple to state ID mapping. 69// The state tuple T must have == defined. H is the hash function. 70template <class T, class H> 71class HashStateTable : public HashBiTable<typename T::StateId, T, H> { 72 public: 73 typedef T StateTuple; 74 typedef typename StateTuple::StateId StateId; 75 using HashBiTable<StateId, T, H>::FindId; 76 using HashBiTable<StateId, T, H>::FindEntry; 77 using HashBiTable<StateId, T, H>::Size; 78 79 HashStateTable() : HashBiTable<StateId, T, H>() {} 80 81 // Reserves space for table_size elements. 82 explicit HashStateTable(size_t table_size) 83 : HashBiTable<StateId, T, H>(table_size) {} 84 85 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } 86 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 87}; 88 89 90// An implementation using a hash map for the tuple to state ID mapping. 91// The state tuple T must have == defined. H is the hash function. 92template <class T, class H> 93class CompactHashStateTable 94 : public CompactHashBiTable<typename T::StateId, T, H> { 95 public: 96 typedef T StateTuple; 97 typedef typename StateTuple::StateId StateId; 98 using CompactHashBiTable<StateId, T, H>::FindId; 99 using CompactHashBiTable<StateId, T, H>::FindEntry; 100 using CompactHashBiTable<StateId, T, H>::Size; 101 102 CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {} 103 104 // Reserves space for 'table_size' elements. 105 explicit CompactHashStateTable(size_t table_size) 106 : CompactHashBiTable<StateId, T, H>(table_size) {} 107 108 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } 109 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 110}; 111 112// An implementation using a vector for the tuple to state mapping. 113// It is passed a function object FP that should fingerprint tuples 114// uniquely to an integer that can used as a vector index. Normally, 115// VectorStateTable constructs the FP object. The user can instead 116// pass in this object; in that case, VectorStateTable takes its 117// ownership. 118template <class T, class FP> 119class VectorStateTable 120 : public VectorBiTable<typename T::StateId, T, FP> { 121 public: 122 typedef T StateTuple; 123 typedef typename StateTuple::StateId StateId; 124 using VectorBiTable<StateId, T, FP>::FindId; 125 using VectorBiTable<StateId, T, FP>::FindEntry; 126 using VectorBiTable<StateId, T, FP>::Size; 127 using VectorBiTable<StateId, T, FP>::Fingerprint; 128 129 // Reserves space for 'table_size' elements. 130 explicit VectorStateTable(FP *fp = 0, size_t table_size = 0) 131 : VectorBiTable<StateId, T, FP>(fp, table_size) {} 132 133 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } 134 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 135}; 136 137 138// An implementation using a vector and a compact hash table. The 139// selecting functor S returns true for tuples to be hashed in the 140// vector. The fingerprinting functor FP returns a unique fingerprint 141// for each tuple to be hashed in the vector (these need to be 142// suitable for indexing in a vector). The hash functor H is used when 143// hashing tuple into the compact hash table. 144template <class T, class S, class FP, class H> 145class VectorHashStateTable 146 : public VectorHashBiTable<typename T::StateId, T, S, FP, H> { 147 public: 148 typedef T StateTuple; 149 typedef typename StateTuple::StateId StateId; 150 using VectorHashBiTable<StateId, T, S, FP, H>::FindId; 151 using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry; 152 using VectorHashBiTable<StateId, T, S, FP, H>::Size; 153 using VectorHashBiTable<StateId, T, S, FP, H>::Selector; 154 using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint; 155 using VectorHashBiTable<StateId, T, S, FP, H>::Hash; 156 157 VectorHashStateTable(S *s, FP *fp, H *h, 158 size_t vector_size = 0, 159 size_t tuple_size = 0) 160 : VectorHashBiTable<StateId, T, S, FP, H>( 161 s, fp, h, vector_size, tuple_size) {} 162 163 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } 164 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 165}; 166 167 168// An implementation using a hash map for the tuple to state ID 169// mapping. This version permits erasing of states. The state tuple T 170// must have == defined and its default constructor must produce a 171// tuple that will never be seen. F is the hash function. 172template <class T, class F> 173class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> { 174 public: 175 typedef T StateTuple; 176 typedef typename StateTuple::StateId StateId; 177 using ErasableBiTable<StateId, T, F>::FindId; 178 using ErasableBiTable<StateId, T, F>::FindEntry; 179 using ErasableBiTable<StateId, T, F>::Size; 180 using ErasableBiTable<StateId, T, F>::Erase; 181 182 ErasableStateTable() : ErasableBiTable<StateId, T, F>() {} 183 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } 184 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 185}; 186 187// 188// COMPOSITION STATE TUPLES AND TABLES 189// 190// The composition state table has the form: 191// 192// template <class A, class F> 193// class ComposeStateTable { 194// public: 195// typedef A Arc; 196// typedef F FilterState; 197// typedef typename A::StateId StateId; 198// typedef ComposeStateTuple<StateId> StateTuple; 199// 200// // Required constructors. Copy constructor does not copy state. 201// ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2); 202// ComposeStateTable(const ComposeStateTable<A, F> &table); 203// // Lookup state ID by tuple. If it doesn't exist, then add it. 204// StateId FindState(const StateTuple &); 205// // Lookup state tuple by state ID. 206// const StateTuple<StateId> &Tuple(StateId) const; 207// // # of stored tuples. 208// StateId Size() const; 209// // Return true if error encountered 210// bool Error() const; 211// }; 212 213// Represents the composition state. 214template <typename S, typename F> 215struct ComposeStateTuple { 216 typedef S StateId; 217 typedef F FilterState; 218 219 ComposeStateTuple() 220 : state_id1(kNoStateId), state_id2(kNoStateId), 221 filter_state(FilterState::NoState()) {} 222 223 ComposeStateTuple(StateId s1, StateId s2, const FilterState &f) 224 : state_id1(s1), state_id2(s2), filter_state(f) {} 225 226 StateId state_id1; // State Id on fst1 227 StateId state_id2; // State Id on fst2 228 FilterState filter_state; // State of composition filter 229}; 230 231// Equality of composition state tuples. 232template <typename S, typename F> 233inline bool operator==(const ComposeStateTuple<S, F>& x, 234 const ComposeStateTuple<S, F>& y) { 235 if (&x == &y) 236 return true; 237 return x.state_id1 == y.state_id1 && 238 x.state_id2 == y.state_id2 && 239 x.filter_state == y.filter_state; 240} 241 242 243// Hashing of composition state tuples. 244template <typename S, typename F> 245class ComposeHash { 246 public: 247 size_t operator()(const ComposeStateTuple<S, F>& t) const { 248 return t.state_id1 + t.state_id2 * kPrime0 + 249 t.filter_state.Hash() * kPrime1; 250 } 251 private: 252 static const size_t kPrime0; 253 static const size_t kPrime1; 254}; 255 256template <typename S, typename F> 257const size_t ComposeHash<S, F>::kPrime0 = 7853; 258 259template <typename S, typename F> 260const size_t ComposeHash<S, F>::kPrime1 = 7867; 261 262 263// A HashStateTable over composition tuples. 264template <typename A, 265 typename F, 266 typename H = 267 CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>, 268 ComposeHash<typename A::StateId, F> > > 269class GenericComposeStateTable : public H { 270 public: 271 typedef A Arc; 272 typedef typename A::StateId StateId; 273 typedef F FilterState; 274 typedef ComposeStateTuple<StateId, F> StateTuple; 275 276 GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} 277 278 // Reserves space for 'table_size' elements. 279 GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2, 280 size_t table_size) : H(table_size) {} 281 282 bool Error() const { return false; } 283 284 private: 285 void operator=(const GenericComposeStateTable<A, F> &table); // disallow 286}; 287 288 289// Fingerprint for general composition tuples. 290template <typename S, typename F> 291class ComposeFingerprint { 292 public: 293 typedef S StateId; 294 typedef F FilterState; 295 typedef ComposeStateTuple<S, F> StateTuple; 296 297 // Required but suboptimal constructor. 298 ComposeFingerprint() : mult1_(8192), mult2_(8192) { 299 LOG(WARNING) << "TupleFingerprint: # of FST states should be provided."; 300 } 301 302 // Constructor is provided the sizes of the input FSTs 303 ComposeFingerprint(StateId nstates1, StateId nstates2) 304 : mult1_(nstates1), mult2_(nstates1 * nstates2) { } 305 306 size_t operator()(const StateTuple &tuple) { 307 return tuple.state_id1 + tuple.state_id2 * mult1_ + 308 tuple.filter_state.Hash() * mult2_; 309 } 310 311 private: 312 ssize_t mult1_; 313 ssize_t mult2_; 314}; 315 316 317// Useful when the first composition state determines the tuple. 318template <typename S, typename F> 319class ComposeState1Fingerprint { 320 public: 321 typedef S StateId; 322 typedef F FilterState; 323 typedef ComposeStateTuple<S, F> StateTuple; 324 325 size_t operator()(const StateTuple &tuple) { return tuple.state_id1; } 326}; 327 328 329// Useful when the second composition state determines the tuple. 330template <typename S, typename F> 331class ComposeState2Fingerprint { 332 public: 333 typedef S StateId; 334 typedef F FilterState; 335 typedef ComposeStateTuple<S, F> StateTuple; 336 337 size_t operator()(const StateTuple &tuple) { return tuple.state_id2; } 338}; 339 340 341// A VectorStateTable over composition tuples. This can be used when 342// the product of number of states in FST1 and FST2 (and the 343// composition filter state hash) is manageable. If the FSTs are not 344// expanded Fsts, they will first have their states counted. 345template <typename A, typename F> 346class ProductComposeStateTable : public 347VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 348 ComposeFingerprint<typename A::StateId, F> > { 349 public: 350 typedef A Arc; 351 typedef typename A::StateId StateId; 352 typedef F FilterState; 353 typedef ComposeStateTuple<StateId, F> StateTuple; 354 typedef VectorStateTable<StateTuple, 355 ComposeFingerprint<StateId, F> > StateTable; 356 357 // Reserves space for 'table_size' elements. 358 ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2, 359 size_t table_size = 0) 360 : StateTable(new ComposeFingerprint<StateId, F>(CountStates(fst1), 361 CountStates(fst2)), 362 table_size) {} 363 364 ProductComposeStateTable(const ProductComposeStateTable<A, F> &table) 365 : StateTable(new ComposeFingerprint<StateId, F>(table.Fingerprint())) {} 366 367 bool Error() const { return false; } 368 369 private: 370 void operator=(const ProductComposeStateTable<A, F> &table); // disallow 371}; 372 373// A VectorStateTable over composition tuples. This can be used when 374// FST1 is a string (satisfies kStringProperties) and FST2 is 375// epsilon-free and deterministic. It should be used with a 376// composition filter that creates at most one filter state per tuple 377// under these conditions (e.g. SequenceComposeFilter or 378// MatchComposeFilter). 379template <typename A, typename F> 380class StringDetComposeStateTable : public 381VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 382 ComposeState1Fingerprint<typename A::StateId, F> > { 383 public: 384 typedef A Arc; 385 typedef typename A::StateId StateId; 386 typedef F FilterState; 387 typedef ComposeStateTuple<StateId, F> StateTuple; 388 typedef VectorStateTable<StateTuple, 389 ComposeState1Fingerprint<StateId, F> > StateTable; 390 391 StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) 392 : error_(false) { 393 uint64 props1 = kString; 394 uint64 props2 = kIDeterministic | kNoIEpsilons; 395 if (fst1.Properties(props1, true) != props1 || 396 fst2.Properties(props2, true) != props2) { 397 FSTERROR() << "StringDetComposeStateTable: fst1 not a string or" 398 << " fst2 not input deterministic and epsilon-free"; 399 error_ = true; 400 } 401 } 402 403 StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table) 404 : StateTable(table), error_(table.error_) {} 405 406 bool Error() const { return error_; } 407 408 private: 409 bool error_; 410 411 void operator=(const StringDetComposeStateTable<A, F> &table); // disallow 412}; 413 414 415// A VectorStateTable over composition tuples. This can be used when 416// FST2 is a string (satisfies kStringProperties) and FST1 is 417// epsilon-free and deterministic. It should be used with a 418// composition filter that creates at most one filter state per tuple 419// under these conditions (e.g. SequenceComposeFilter or 420// MatchComposeFilter). 421template <typename A, typename F> 422class DetStringComposeStateTable : public 423VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 424 ComposeState2Fingerprint<typename A::StateId, F> > { 425 public: 426 typedef A Arc; 427 typedef typename A::StateId StateId; 428 typedef F FilterState; 429 typedef ComposeStateTuple<StateId, F> StateTuple; 430 typedef VectorStateTable<StateTuple, 431 ComposeState2Fingerprint<StateId, F> > StateTable; 432 433 DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) 434 :error_(false) { 435 uint64 props1 = kODeterministic | kNoOEpsilons; 436 uint64 props2 = kString; 437 if (fst1.Properties(props1, true) != props1 || 438 fst2.Properties(props2, true) != props2) { 439 FSTERROR() << "StringDetComposeStateTable: fst2 not a string or" 440 << " fst1 not output deterministic and epsilon-free"; 441 error_ = true; 442 } 443 } 444 445 DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table) 446 : StateTable(table), error_(table.error_) {} 447 448 bool Error() const { return error_; } 449 450 private: 451 bool error_; 452 453 void operator=(const DetStringComposeStateTable<A, F> &table); // disallow 454}; 455 456 457// An ErasableStateTable over composition tuples. The Erase(StateId) method 458// can be called if the user either is sure that composition will never return 459// to that tuple or doesn't care that if it does, it is assigned a new 460// state ID. 461template <typename A, typename F> 462class ErasableComposeStateTable : public 463ErasableStateTable<ComposeStateTuple<typename A::StateId, F>, 464 ComposeHash<typename A::StateId, F> > { 465 public: 466 typedef A Arc; 467 typedef typename A::StateId StateId; 468 typedef F FilterState; 469 typedef ComposeStateTuple<StateId, F> StateTuple; 470 471 ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} 472 473 bool Error() const { return false; } 474 475 private: 476 void operator=(const ErasableComposeStateTable<A, F> &table); // disallow 477}; 478 479} // namespace fst 480 481#endif // FST_LIB_STATE_TABLE_H__ 482