1// state-table.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Classes for representing the mapping between state tuples and state Ids.
20
21#ifndef FST_LIB_STATE_TABLE_H__
22#define FST_LIB_STATE_TABLE_H__
23
24#include <deque>
25using std::deque;
26#include <vector>
27using std::vector;
28
29#include <fst/bi-table.h>
30#include <fst/expanded-fst.h>
31
32
33namespace fst {
34
35// STATE TABLES - these determine the bijective mapping between state
36// tuples (e.g. in composition triples of two FST states and a
37// composition filter state) and their corresponding state IDs.
38// They are classes, templated on state tuples, of the form:
39//
40// template <class T>
41// class StateTable {
42//  public:
43//   typedef typename T StateTuple;
44//
45//   // Required constructors.
46//   StateTable();
47//
48//   // Lookup state ID by tuple. If it doesn't exist, then add it.
49//   StateId FindState(const StateTuple &);
50//   // Lookup state tuple by state ID.
51//   const StateTuple<StateId> &Tuple(StateId) const;
52//   // # of stored tuples.
53//   StateId Size() const;
54// };
55//
56// A state tuple has the form:
57//
58// template <class S>
59// struct StateTuple {
60//   typedef typename S StateId;
61//
62//   // Required constructors.
63//   StateTuple();
64//   StateTuple(const StateTuple &);
65// };
66
67
68// An implementation using a hash map for the tuple to state ID mapping.
69// The state tuple T must have == defined. H is the hash function.
70template <class T, class H>
71class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
72 public:
73  typedef T StateTuple;
74  typedef typename StateTuple::StateId StateId;
75  using HashBiTable<StateId, T, H>::FindId;
76  using HashBiTable<StateId, T, H>::FindEntry;
77  using HashBiTable<StateId, T, H>::Size;
78
79  HashStateTable() : HashBiTable<StateId, T, H>() {}
80
81  // Reserves space for table_size elements.
82  explicit HashStateTable(size_t table_size)
83      : HashBiTable<StateId, T, H>(table_size) {}
84
85  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
86  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
87};
88
89
90// An implementation using a hash map for the tuple to state ID mapping.
91// The state tuple T must have == defined. H is the hash function.
92template <class T, class H>
93class CompactHashStateTable
94    : public CompactHashBiTable<typename T::StateId, T, H> {
95 public:
96  typedef T StateTuple;
97  typedef typename StateTuple::StateId StateId;
98  using CompactHashBiTable<StateId, T, H>::FindId;
99  using CompactHashBiTable<StateId, T, H>::FindEntry;
100  using CompactHashBiTable<StateId, T, H>::Size;
101
102  CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}
103
104  // Reserves space for 'table_size' elements.
105  explicit CompactHashStateTable(size_t table_size)
106      : CompactHashBiTable<StateId, T, H>(table_size) {}
107
108  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
109  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
110};
111
112// An implementation using a vector for the tuple to state mapping.
113// It is passed a function object FP that should fingerprint tuples
114// uniquely to an integer that can used as a vector index. Normally,
115// VectorStateTable constructs the FP object.  The user can instead
116// pass in this object; in that case, VectorStateTable takes its
117// ownership.
118template <class T, class FP>
119class VectorStateTable
120    : public VectorBiTable<typename T::StateId, T, FP> {
121 public:
122  typedef T StateTuple;
123  typedef typename StateTuple::StateId StateId;
124  using VectorBiTable<StateId, T, FP>::FindId;
125  using VectorBiTable<StateId, T, FP>::FindEntry;
126  using VectorBiTable<StateId, T, FP>::Size;
127  using VectorBiTable<StateId, T, FP>::Fingerprint;
128
129  // Reserves space for 'table_size' elements.
130  explicit VectorStateTable(FP *fp = 0, size_t table_size = 0)
131      : VectorBiTable<StateId, T, FP>(fp, table_size) {}
132
133  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
134  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
135};
136
137
138// An implementation using a vector and a compact hash table. The
139// selecting functor S returns true for tuples to be hashed in the
140// vector.  The fingerprinting functor FP returns a unique fingerprint
141// for each tuple to be hashed in the vector (these need to be
142// suitable for indexing in a vector).  The hash functor H is used when
143// hashing tuple into the compact hash table.
144template <class T, class S, class FP, class H>
145class VectorHashStateTable
146    : public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
147 public:
148  typedef T StateTuple;
149  typedef typename StateTuple::StateId StateId;
150  using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
151  using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
152  using VectorHashBiTable<StateId, T, S, FP, H>::Size;
153  using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
154  using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
155  using VectorHashBiTable<StateId, T, S, FP, H>::Hash;
156
157  VectorHashStateTable(S *s, FP *fp, H *h,
158                       size_t vector_size = 0,
159                       size_t tuple_size = 0)
160      : VectorHashBiTable<StateId, T, S, FP, H>(
161          s, fp, h, vector_size, tuple_size) {}
162
163  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
164  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
165};
166
167
168// An implementation using a hash map for the tuple to state ID
169// mapping. This version permits erasing of states.  The state tuple T
170// must have == defined and its default constructor must produce a
171// tuple that will never be seen. F is the hash function.
172template <class T, class F>
173class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
174 public:
175  typedef T StateTuple;
176  typedef typename StateTuple::StateId StateId;
177  using ErasableBiTable<StateId, T, F>::FindId;
178  using ErasableBiTable<StateId, T, F>::FindEntry;
179  using ErasableBiTable<StateId, T, F>::Size;
180  using ErasableBiTable<StateId, T, F>::Erase;
181
182  ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
183  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
184  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
185};
186
187//
188// COMPOSITION STATE TUPLES AND TABLES
189//
190// The composition state table has the form:
191//
192// template <class A, class F>
193// class ComposeStateTable {
194//  public:
195//   typedef A Arc;
196//   typedef F FilterState;
197//   typedef typename A::StateId StateId;
198//   typedef ComposeStateTuple<StateId> StateTuple;
199//
200//   // Required constructors. Copy constructor does not copy state.
201//   ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
202//   ComposeStateTable(const ComposeStateTable<A, F> &table);
203//   // Lookup state ID by tuple. If it doesn't exist, then add it.
204//   StateId FindState(const StateTuple &);
205//   // Lookup state tuple by state ID.
206//   const StateTuple<StateId> &Tuple(StateId) const;
207//   // # of stored tuples.
208//   StateId Size() const;
209//   // Return true if error encountered
210//   bool Error() const;
211// };
212
213// Represents the composition state.
214template <typename S, typename F>
215struct ComposeStateTuple {
216  typedef S StateId;
217  typedef F FilterState;
218
219  ComposeStateTuple()
220      : state_id1(kNoStateId), state_id2(kNoStateId),
221        filter_state(FilterState::NoState()) {}
222
223  ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
224      : state_id1(s1), state_id2(s2), filter_state(f) {}
225
226  StateId state_id1;         // State Id on fst1
227  StateId state_id2;         // State Id on fst2
228  FilterState filter_state;  // State of composition filter
229};
230
231// Equality of composition state tuples.
232template <typename S, typename F>
233inline bool operator==(const ComposeStateTuple<S, F>& x,
234                       const ComposeStateTuple<S, F>& y) {
235  if (&x == &y)
236    return true;
237  return x.state_id1 == y.state_id1 &&
238      x.state_id2 == y.state_id2 &&
239      x.filter_state == y.filter_state;
240}
241
242
243// Hashing of composition state tuples.
244template <typename S, typename F>
245class ComposeHash {
246 public:
247  size_t operator()(const ComposeStateTuple<S, F>& t) const {
248    return t.state_id1 + t.state_id2 * kPrime0 +
249        t.filter_state.Hash() * kPrime1;
250  }
251 private:
252  static const size_t kPrime0;
253  static const size_t kPrime1;
254};
255
256template <typename S, typename F>
257const size_t ComposeHash<S, F>::kPrime0 = 7853;
258
259template <typename S, typename F>
260const size_t ComposeHash<S, F>::kPrime1 = 7867;
261
262
263// A HashStateTable over composition tuples.
264template <typename A,
265          typename F,
266          typename H =
267          CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
268                                ComposeHash<typename A::StateId, F> > >
269class GenericComposeStateTable : public H {
270 public:
271  typedef A Arc;
272  typedef typename A::StateId StateId;
273  typedef F FilterState;
274  typedef ComposeStateTuple<StateId, F> StateTuple;
275
276  GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
277
278  // Reserves space for 'table_size' elements.
279  GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2,
280                           size_t table_size) : H(table_size) {}
281
282  bool Error() const { return false; }
283
284 private:
285  void operator=(const GenericComposeStateTable<A, F> &table);  // disallow
286};
287
288
289//  Fingerprint for general composition tuples.
290template  <typename S, typename F>
291class ComposeFingerprint {
292 public:
293  typedef S StateId;
294  typedef F FilterState;
295  typedef ComposeStateTuple<S, F> StateTuple;
296
297  // Required but suboptimal constructor.
298  ComposeFingerprint() : mult1_(8192), mult2_(8192) {
299    LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
300  }
301
302  // Constructor is provided the sizes of the input FSTs
303  ComposeFingerprint(StateId nstates1, StateId nstates2)
304      : mult1_(nstates1), mult2_(nstates1 * nstates2) { }
305
306  size_t operator()(const StateTuple &tuple) {
307    return tuple.state_id1 + tuple.state_id2 * mult1_ +
308        tuple.filter_state.Hash() * mult2_;
309  }
310
311 private:
312  ssize_t mult1_;
313  ssize_t mult2_;
314};
315
316
317// Useful when the first composition state determines the tuple.
318template  <typename S, typename F>
319class ComposeState1Fingerprint {
320 public:
321  typedef S StateId;
322  typedef F FilterState;
323  typedef ComposeStateTuple<S, F> StateTuple;
324
325  size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
326};
327
328
329// Useful when the second composition state determines the tuple.
330template  <typename S, typename F>
331class ComposeState2Fingerprint {
332 public:
333  typedef S StateId;
334  typedef F FilterState;
335  typedef ComposeStateTuple<S, F> StateTuple;
336
337  size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
338};
339
340
341// A VectorStateTable over composition tuples.  This can be used when
342// the product of number of states in FST1 and FST2 (and the
343// composition filter state hash) is manageable. If the FSTs are not
344// expanded Fsts, they will first have their states counted.
345template  <typename A, typename F>
346class ProductComposeStateTable : public
347VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
348                 ComposeFingerprint<typename A::StateId, F> > {
349 public:
350  typedef A Arc;
351  typedef typename A::StateId StateId;
352  typedef F FilterState;
353  typedef ComposeStateTuple<StateId, F> StateTuple;
354  typedef VectorStateTable<StateTuple,
355                           ComposeFingerprint<StateId, F> > StateTable;
356
357  // Reserves space for 'table_size' elements.
358  ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2,
359                           size_t table_size = 0)
360      : StateTable(new ComposeFingerprint<StateId, F>(CountStates(fst1),
361                                                      CountStates(fst2)),
362                   table_size) {}
363
364  ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
365      : StateTable(new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}
366
367  bool Error() const { return false; }
368
369 private:
370  void operator=(const ProductComposeStateTable<A, F> &table);  // disallow
371};
372
373// A VectorStateTable over composition tuples.  This can be used when
374// FST1 is a string (satisfies kStringProperties) and FST2 is
375// epsilon-free and deterministic. It should be used with a
376// composition filter that creates at most one filter state per tuple
377// under these conditions (e.g. SequenceComposeFilter or
378// MatchComposeFilter).
379template <typename A, typename F>
380class StringDetComposeStateTable : public
381VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
382                 ComposeState1Fingerprint<typename A::StateId, F> > {
383 public:
384  typedef A Arc;
385  typedef typename A::StateId StateId;
386  typedef F FilterState;
387  typedef ComposeStateTuple<StateId, F> StateTuple;
388  typedef VectorStateTable<StateTuple,
389                           ComposeState1Fingerprint<StateId, F> > StateTable;
390
391  StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
392      : error_(false) {
393    uint64 props1 = kString;
394    uint64 props2 = kIDeterministic | kNoIEpsilons;
395    if (fst1.Properties(props1, true) != props1 ||
396        fst2.Properties(props2, true) != props2) {
397      FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
398                 << " fst2 not input deterministic and epsilon-free";
399      error_ = true;
400    }
401  }
402
403  StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
404      : StateTable(table), error_(table.error_) {}
405
406  bool Error() const { return error_; }
407
408 private:
409  bool error_;
410
411  void operator=(const StringDetComposeStateTable<A, F> &table);  // disallow
412};
413
414
415// A VectorStateTable over composition tuples.  This can be used when
416// FST2 is a string (satisfies kStringProperties) and FST1 is
417// epsilon-free and deterministic. It should be used with a
418// composition filter that creates at most one filter state per tuple
419// under these conditions (e.g. SequenceComposeFilter or
420// MatchComposeFilter).
421template <typename A, typename F>
422class DetStringComposeStateTable : public
423VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
424                 ComposeState2Fingerprint<typename A::StateId, F> > {
425 public:
426  typedef A Arc;
427  typedef typename A::StateId StateId;
428  typedef F FilterState;
429  typedef ComposeStateTuple<StateId, F> StateTuple;
430  typedef VectorStateTable<StateTuple,
431                           ComposeState2Fingerprint<StateId, F> > StateTable;
432
433  DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
434      :error_(false) {
435    uint64 props1 = kODeterministic | kNoOEpsilons;
436    uint64 props2 = kString;
437    if (fst1.Properties(props1, true) != props1 ||
438        fst2.Properties(props2, true) != props2) {
439      FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
440                 << " fst1 not output deterministic and epsilon-free";
441      error_ = true;
442    }
443  }
444
445  DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
446      : StateTable(table), error_(table.error_) {}
447
448  bool Error() const { return error_; }
449
450 private:
451  bool error_;
452
453  void operator=(const DetStringComposeStateTable<A, F> &table);  // disallow
454};
455
456
457// An ErasableStateTable over composition tuples. The Erase(StateId) method
458// can be called if the user either is sure that composition will never return
459// to that tuple or doesn't care that if it does, it is assigned a new
460// state ID.
461template <typename A, typename F>
462class ErasableComposeStateTable : public
463ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
464                   ComposeHash<typename A::StateId, F> > {
465 public:
466  typedef A Arc;
467  typedef typename A::StateId StateId;
468  typedef F FilterState;
469  typedef ComposeStateTuple<StateId, F> StateTuple;
470
471  ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
472
473  bool Error() const { return false; }
474
475 private:
476  void operator=(const ErasableComposeStateTable<A, F> &table);  // disallow
477};
478
479}  // namespace fst
480
481#endif  // FST_LIB_STATE_TABLE_H__
482