state-table.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1// state-table.h 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: riley@google.com (Michael Riley) 17// 18// \file 19// Classes for representing the mapping between state tuples and state Ids. 20 21#ifndef FST_LIB_STATE_TABLE_H__ 22#define FST_LIB_STATE_TABLE_H__ 23 24#include <deque> 25#include <vector> 26using std::vector; 27 28#include <fst/bi-table.h> 29#include <fst/expanded-fst.h> 30 31 32namespace fst { 33 34// STATE TABLES - these determine the bijective mapping between state 35// tuples (e.g. in composition triples of two FST states and a 36// composition filter state) and their corresponding state IDs. 37// They are classes, templated on state tuples, of the form: 38// 39// template <class T> 40// class StateTable { 41// public: 42// typedef typename T StateTuple; 43// 44// // Required constructors. 45// StateTable(); 46// 47// // Lookup state ID by tuple. If it doesn't exist, then add it. 48// StateId FindState(const StateTuple &); 49// // Lookup state tuple by state ID. 50// const StateTuple<StateId> &Tuple(StateId) const; 51// // # of stored tuples. 52// StateId Size() const; 53// }; 54// 55// A state tuple has the form: 56// 57// template <class S> 58// struct StateTuple { 59// typedef typename S StateId; 60// 61// // Required constructor. 62// StateTuple(); 63// }; 64 65 66// An implementation using a hash map for the tuple to state ID mapping. 67// The state tuple T must have == defined and the default constructor 68// must produce a tuple that will never be seen. H is the hash function. 69template <class T, class H> 70class HashStateTable : public HashBiTable<typename T::StateId, T, H> { 71 public: 72 typedef T StateTuple; 73 typedef typename StateTuple::StateId StateId; 74 using HashBiTable<StateId, T, H>::FindId; 75 using HashBiTable<StateId, T, H>::FindEntry; 76 using HashBiTable<StateId, T, H>::Size; 77 78 HashStateTable() : HashBiTable<StateId, T, H>() {} 79 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } 80 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 81}; 82 83 84// An implementation using a hash set for the tuple to state ID 85// mapping. The state tuple T must have == defined and the default 86// constructor must produce a tuple that will never be seen. H is the 87// hash function. 88template <class T, class H> 89class CompactHashStateTable 90 : public CompactHashBiTable<typename T::StateId, T, H> { 91 public: 92 typedef T StateTuple; 93 typedef typename StateTuple::StateId StateId; 94 using CompactHashBiTable<StateId, T, H>::FindId; 95 using CompactHashBiTable<StateId, T, H>::FindEntry; 96 using CompactHashBiTable<StateId, T, H>::Size; 97 98 CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {} 99 100 // Reserves space for table_size elements. 101 explicit CompactHashStateTable(size_t table_size) 102 : CompactHashBiTable<StateId, T, H>(table_size) {} 103 104 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } 105 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 106}; 107 108// An implementation using a vector for the tuple to state mapping. 109// It is passed a function object FP that should fingerprint tuples 110// uniquely to an integer that can used as a vector index. Normally, 111// VectorStateTable constructs the FP object. The user can instead 112// pass in this object; in that case, VectorStateTable takes its 113// ownership. 114template <class T, class FP> 115class VectorStateTable 116 : public VectorBiTable<typename T::StateId, T, FP> { 117 public: 118 typedef T StateTuple; 119 typedef typename StateTuple::StateId StateId; 120 using VectorBiTable<StateId, T, FP>::FindId; 121 using VectorBiTable<StateId, T, FP>::FindEntry; 122 using VectorBiTable<StateId, T, FP>::Size; 123 using VectorBiTable<StateId, T, FP>::Fingerprint; 124 125 explicit VectorStateTable(FP *fp = 0) : VectorBiTable<StateId, T, FP>(fp) {} 126 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } 127 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 128}; 129 130 131// An implementation using a vector and a compact hash table. The 132// selecting functor S returns true for tuples to be hashed in the 133// vector. The fingerprinting functor FP returns a unique fingerprint 134// for each tuple to be hashed in the vector (these need to be 135// suitable for indexing in a vector). The hash functor H is used when 136// hashing tuple into the compact hash table. 137template <class T, class S, class FP, class H> 138class VectorHashStateTable 139 : public VectorHashBiTable<typename T::StateId, T, S, FP, H> { 140 public: 141 typedef T StateTuple; 142 typedef typename StateTuple::StateId StateId; 143 using VectorHashBiTable<StateId, T, S, FP, H>::FindId; 144 using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry; 145 using VectorHashBiTable<StateId, T, S, FP, H>::Size; 146 using VectorHashBiTable<StateId, T, S, FP, H>::Selector; 147 using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint; 148 using VectorHashBiTable<StateId, T, S, FP, H>::Hash; 149 150 VectorHashStateTable(S *s, FP *fp, H *h, 151 size_t vector_size = 0, 152 size_t tuple_size = 0) 153 : VectorHashBiTable<StateId, T, S, FP, H>( 154 s, fp, h, vector_size, tuple_size) {} 155 156 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } 157 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 158}; 159 160 161// An implementation using a hash map for the tuple to state ID 162// mapping. This version permits erasing of states. The state tuple T 163// must have == defined and its default constructor must produce a 164// tuple that will never be seen. F is the hash function. 165template <class T, class F> 166class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> { 167 public: 168 typedef T StateTuple; 169 typedef typename StateTuple::StateId StateId; 170 using ErasableBiTable<StateId, T, F>::FindId; 171 using ErasableBiTable<StateId, T, F>::FindEntry; 172 using ErasableBiTable<StateId, T, F>::Size; 173 using ErasableBiTable<StateId, T, F>::Erase; 174 175 ErasableStateTable() : ErasableBiTable<StateId, T, F>() {} 176 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } 177 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 178}; 179 180// 181// COMPOSITION STATE TUPLES AND TABLES 182// 183// The composition state table has the form: 184// 185// template <class A, class F> 186// class ComposeStateTable { 187// public: 188// typedef A Arc; 189// typedef F FilterState; 190// typedef typename A::StateId StateId; 191// typedef ComposeStateTuple<StateId> StateTuple; 192// 193// // Required constructors. Copy constructor does not copy state. 194// ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2); 195// ComposeStateTable(const ComposeStateTable<A, F> &table); 196// // Lookup state ID by tuple. If it doesn't exist, then add it. 197// StateId FindState(const StateTuple &); 198// // Lookup state tuple by state ID. 199// const StateTuple<StateId> &Tuple(StateId) const; 200// // # of stored tuples. 201// StateId Size() const; 202// // Return true if error encountered 203// bool Error() const; 204// }; 205 206// Represents the composition state. 207template <typename S, typename F> 208struct ComposeStateTuple { 209 typedef S StateId; 210 typedef F FilterState; 211 212 ComposeStateTuple() 213 : state_id1(kNoStateId), state_id2(kNoStateId), 214 filter_state(FilterState::NoState()) {} 215 216 ComposeStateTuple(StateId s1, StateId s2, const FilterState &f) 217 : state_id1(s1), state_id2(s2), filter_state(f) {} 218 219 StateId state_id1; // State Id on fst1 220 StateId state_id2; // State Id on fst2 221 FilterState filter_state; // State of composition filter 222}; 223 224// Equality of composition state tuples. 225template <typename S, typename F> 226inline bool operator==(const ComposeStateTuple<S, F>& x, 227 const ComposeStateTuple<S, F>& y) { 228 if (&x == &y) 229 return true; 230 return x.state_id1 == y.state_id1 && 231 x.state_id2 == y.state_id2 && 232 x.filter_state == y.filter_state; 233} 234 235 236// Hashing of composition state tuples. 237template <typename S, typename F> 238class ComposeHash { 239 public: 240 size_t operator()(const ComposeStateTuple<S, F>& t) const { 241 return t.state_id1 + t.state_id2 * kPrime0 + 242 t.filter_state.Hash() * kPrime1; 243 } 244 private: 245 static const size_t kPrime0; 246 static const size_t kPrime1; 247}; 248 249template <typename S, typename F> 250const size_t ComposeHash<S, F>::kPrime0 = 7853; 251 252template <typename S, typename F> 253const size_t ComposeHash<S, F>::kPrime1 = 7867; 254 255 256// A HashStateTable over composition tuples. 257template <typename A, 258 typename F, 259 typename H = 260 CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>, 261 ComposeHash<typename A::StateId, F> > > 262class GenericComposeStateTable : public H { 263 public: 264 typedef A Arc; 265 typedef typename A::StateId StateId; 266 typedef F FilterState; 267 typedef ComposeStateTuple<StateId, F> StateTuple; 268 269 GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} 270 271 GenericComposeStateTable(const GenericComposeStateTable<A, F> &table) {} 272 273 bool Error() const { return false; } 274 275 private: 276 void operator=(const GenericComposeStateTable<A, F> &table); // disallow 277}; 278 279 280// Fingerprint for general composition tuples. 281template <typename S, typename F> 282class ComposeFingerprint { 283 public: 284 typedef S StateId; 285 typedef F FilterState; 286 typedef ComposeStateTuple<S, F> StateTuple; 287 288 // Required but suboptimal constructor. 289 ComposeFingerprint() : mult1_(8192), mult2_(8192) { 290 LOG(WARNING) << "TupleFingerprint: # of FST states should be provided."; 291 } 292 293 // Constructor is provided the sizes of the input FSTs 294 ComposeFingerprint(StateId nstates1, StateId nstates2) 295 : mult1_(nstates1), mult2_(nstates1 * nstates2) { } 296 297 size_t operator()(const StateTuple &tuple) { 298 return tuple.state_id1 + tuple.state_id2 * mult1_ + 299 tuple.filter_state.Hash() * mult2_; 300 } 301 302 private: 303 ssize_t mult1_; 304 ssize_t mult2_; 305}; 306 307 308// Useful when the first composition state determines the tuple. 309template <typename S, typename F> 310class ComposeState1Fingerprint { 311 public: 312 typedef S StateId; 313 typedef F FilterState; 314 typedef ComposeStateTuple<S, F> StateTuple; 315 316 size_t operator()(const StateTuple &tuple) { return tuple.state_id1; } 317}; 318 319 320// Useful when the second composition state determines the tuple. 321template <typename S, typename F> 322class ComposeState2Fingerprint { 323 public: 324 typedef S StateId; 325 typedef F FilterState; 326 typedef ComposeStateTuple<S, F> StateTuple; 327 328 size_t operator()(const StateTuple &tuple) { return tuple.state_id2; } 329}; 330 331 332// A VectorStateTable over composition tuples. This can be used when 333// the product of number of states in FST1 and FST2 (and the 334// composition filter state hash) is manageable. If the FSTs are not 335// expanded Fsts, they will first have their states counted. 336template <typename A, typename F> 337class ProductComposeStateTable : public 338VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 339 ComposeFingerprint<typename A::StateId, F> > { 340 public: 341 typedef A Arc; 342 typedef typename A::StateId StateId; 343 typedef F FilterState; 344 typedef ComposeStateTuple<StateId, F> StateTuple; 345 346 ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) 347 : VectorStateTable<ComposeStateTuple<StateId, F>, 348 ComposeFingerprint<StateId, F> > 349 (new ComposeFingerprint<StateId, F>(CountStates(fst1), 350 CountStates(fst2))) { } 351 352 ProductComposeStateTable(const ProductComposeStateTable<A, F> &table) 353 : VectorStateTable<ComposeStateTuple<StateId, F>, 354 ComposeFingerprint<StateId, F> > 355 (new ComposeFingerprint<StateId, F>(table.Fingerprint())) {} 356 357 bool Error() const { return false; } 358 359 private: 360 void operator=(const ProductComposeStateTable<A, F> &table); // disallow 361}; 362 363// A VectorStateTable over composition tuples. This can be used when 364// FST1 is a string (satisfies kStringProperties) and FST2 is 365// epsilon-free and deterministic. It should be used with a 366// composition filter that creates at most one filter state per tuple 367// under these conditions (e.g. SequenceComposeFilter or 368// MatchComposeFilter). 369template <typename A, typename F> 370class StringDetComposeStateTable : public 371VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 372 ComposeState1Fingerprint<typename A::StateId, F> > { 373 public: 374 typedef A Arc; 375 typedef typename A::StateId StateId; 376 typedef F FilterState; 377 typedef ComposeStateTuple<StateId, F> StateTuple; 378 379 StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) 380 : error_(false) { 381 uint64 props1 = kString; 382 uint64 props2 = kIDeterministic | kNoIEpsilons; 383 if (fst1.Properties(props1, true) != props1 || 384 fst2.Properties(props2, true) != props2) { 385 FSTERROR() << "StringDetComposeStateTable: fst1 not a string or" 386 << " fst2 not input deterministic and epsilon-free"; 387 error_ = true; 388 } 389 } 390 391 StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table) 392 : error_(table.error_) {} 393 394 bool Error() const { return error_; } 395 396 private: 397 bool error_; 398 399 void operator=(const StringDetComposeStateTable<A, F> &table); // disallow 400}; 401 402 403// A VectorStateTable over composition tuples. This can be used when 404// FST2 is a string (satisfies kStringProperties) and FST1 is 405// epsilon-free and deterministic. It should be used with a 406// composition filter that creates at most one filter state per tuple 407// under these conditions (e.g. SequenceComposeFilter or 408// MatchComposeFilter). 409template <typename A, typename F> 410class DetStringComposeStateTable : public 411VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 412 ComposeState1Fingerprint<typename A::StateId, F> > { 413 public: 414 typedef A Arc; 415 typedef typename A::StateId StateId; 416 typedef F FilterState; 417 typedef ComposeStateTuple<StateId, F> StateTuple; 418 419 DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) 420 :error_(false) { 421 uint64 props1 = kODeterministic | kNoOEpsilons; 422 uint64 props2 = kString; 423 if (fst1.Properties(props1, true) != props1 || 424 fst2.Properties(props2, true) != props2) { 425 FSTERROR() << "StringDetComposeStateTable: fst2 not a string or" 426 << " fst1 not output deterministic and epsilon-free"; 427 error_ = true; 428 } 429 } 430 431 DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table) 432 : error_(table.error_) {} 433 434 bool Error() const { return error_; } 435 436 private: 437 bool error_; 438 439 void operator=(const DetStringComposeStateTable<A, F> &table); // disallow 440}; 441 442 443// An ErasableStateTable over composition tuples. The Erase(StateId) method 444// can be called if the user either is sure that composition will never return 445// to that tuple or doesn't care that if it does, it is assigned a new 446// state ID. 447template <typename A, typename F> 448class ErasableComposeStateTable : public 449ErasableStateTable<ComposeStateTuple<typename A::StateId, F>, 450 ComposeHash<typename A::StateId, F> > { 451 public: 452 typedef A Arc; 453 typedef typename A::StateId StateId; 454 typedef F FilterState; 455 typedef ComposeStateTuple<StateId, F> StateTuple; 456 457 ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} 458 459 ErasableComposeStateTable(const ErasableComposeStateTable<A, F> &table) {} 460 461 bool Error() const { return false; } 462 463 private: 464 void operator=(const ErasableComposeStateTable<A, F> &table); // disallow 465}; 466 467} // namespace fst 468 469#endif // FST_LIB_STATE_TABLE_H__ 470