state-table.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1// state-table.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Classes for representing the mapping between state tuples and state Ids.
20
21#ifndef FST_LIB_STATE_TABLE_H__
22#define FST_LIB_STATE_TABLE_H__
23
24#include <deque>
25#include <vector>
26using std::vector;
27
28#include <fst/bi-table.h>
29#include <fst/expanded-fst.h>
30
31
32namespace fst {
33
34// STATE TABLES - these determine the bijective mapping between state
35// tuples (e.g. in composition triples of two FST states and a
36// composition filter state) and their corresponding state IDs.
37// They are classes, templated on state tuples, of the form:
38//
39// template <class T>
40// class StateTable {
41//  public:
42//   typedef typename T StateTuple;
43//
44//   // Required constructors.
45//   StateTable();
46//
47//   // Lookup state ID by tuple. If it doesn't exist, then add it.
48//   StateId FindState(const StateTuple &);
49//   // Lookup state tuple by state ID.
50//   const StateTuple<StateId> &Tuple(StateId) const;
51//   // # of stored tuples.
52//   StateId Size() const;
53// };
54//
55// A state tuple has the form:
56//
57// template <class S>
58// struct StateTuple {
59//   typedef typename S StateId;
60//
61//   // Required constructor.
62//   StateTuple();
63// };
64
65
66// An implementation using a hash map for the tuple to state ID mapping.
67// The state tuple T must have == defined and the default constructor
68// must produce a tuple that will never be seen. H is the hash function.
69template <class T, class H>
70class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
71 public:
72  typedef T StateTuple;
73  typedef typename StateTuple::StateId StateId;
74  using HashBiTable<StateId, T, H>::FindId;
75  using HashBiTable<StateId, T, H>::FindEntry;
76  using HashBiTable<StateId, T, H>::Size;
77
78  HashStateTable() : HashBiTable<StateId, T, H>() {}
79  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
80  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
81};
82
83
84// An implementation using a hash set for the tuple to state ID
85// mapping. The state tuple T must have == defined and the default
86// constructor must produce a tuple that will never be seen. H is the
87// hash function.
88template <class T, class H>
89class CompactHashStateTable
90    : public CompactHashBiTable<typename T::StateId, T, H> {
91 public:
92  typedef T StateTuple;
93  typedef typename StateTuple::StateId StateId;
94  using CompactHashBiTable<StateId, T, H>::FindId;
95  using CompactHashBiTable<StateId, T, H>::FindEntry;
96  using CompactHashBiTable<StateId, T, H>::Size;
97
98  CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}
99
100  // Reserves space for table_size elements.
101  explicit CompactHashStateTable(size_t table_size)
102      : CompactHashBiTable<StateId, T, H>(table_size) {}
103
104  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
105  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
106};
107
108// An implementation using a vector for the tuple to state mapping.
109// It is passed a function object FP that should fingerprint tuples
110// uniquely to an integer that can used as a vector index. Normally,
111// VectorStateTable constructs the FP object.  The user can instead
112// pass in this object; in that case, VectorStateTable takes its
113// ownership.
114template <class T, class FP>
115class VectorStateTable
116    : public VectorBiTable<typename T::StateId, T, FP> {
117 public:
118  typedef T StateTuple;
119  typedef typename StateTuple::StateId StateId;
120  using VectorBiTable<StateId, T, FP>::FindId;
121  using VectorBiTable<StateId, T, FP>::FindEntry;
122  using VectorBiTable<StateId, T, FP>::Size;
123  using VectorBiTable<StateId, T, FP>::Fingerprint;
124
125  explicit VectorStateTable(FP *fp = 0) : VectorBiTable<StateId, T, FP>(fp) {}
126  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
127  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
128};
129
130
131// An implementation using a vector and a compact hash table. The
132// selecting functor S returns true for tuples to be hashed in the
133// vector.  The fingerprinting functor FP returns a unique fingerprint
134// for each tuple to be hashed in the vector (these need to be
135// suitable for indexing in a vector).  The hash functor H is used when
136// hashing tuple into the compact hash table.
137template <class T, class S, class FP, class H>
138class VectorHashStateTable
139    : public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
140 public:
141  typedef T StateTuple;
142  typedef typename StateTuple::StateId StateId;
143  using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
144  using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
145  using VectorHashBiTable<StateId, T, S, FP, H>::Size;
146  using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
147  using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
148  using VectorHashBiTable<StateId, T, S, FP, H>::Hash;
149
150  VectorHashStateTable(S *s, FP *fp, H *h,
151                       size_t vector_size = 0,
152                       size_t tuple_size = 0)
153      : VectorHashBiTable<StateId, T, S, FP, H>(
154          s, fp, h, vector_size, tuple_size) {}
155
156  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
157  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
158};
159
160
161// An implementation using a hash map for the tuple to state ID
162// mapping. This version permits erasing of states.  The state tuple T
163// must have == defined and its default constructor must produce a
164// tuple that will never be seen. F is the hash function.
165template <class T, class F>
166class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
167 public:
168  typedef T StateTuple;
169  typedef typename StateTuple::StateId StateId;
170  using ErasableBiTable<StateId, T, F>::FindId;
171  using ErasableBiTable<StateId, T, F>::FindEntry;
172  using ErasableBiTable<StateId, T, F>::Size;
173  using ErasableBiTable<StateId, T, F>::Erase;
174
175  ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
176  StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
177  const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
178};
179
180//
181// COMPOSITION STATE TUPLES AND TABLES
182//
183// The composition state table has the form:
184//
185// template <class A, class F>
186// class ComposeStateTable {
187//  public:
188//   typedef A Arc;
189//   typedef F FilterState;
190//   typedef typename A::StateId StateId;
191//   typedef ComposeStateTuple<StateId> StateTuple;
192//
193//   // Required constructors. Copy constructor does not copy state.
194//   ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
195//   ComposeStateTable(const ComposeStateTable<A, F> &table);
196//   // Lookup state ID by tuple. If it doesn't exist, then add it.
197//   StateId FindState(const StateTuple &);
198//   // Lookup state tuple by state ID.
199//   const StateTuple<StateId> &Tuple(StateId) const;
200//   // # of stored tuples.
201//   StateId Size() const;
202//   // Return true if error encountered
203//   bool Error() const;
204// };
205
206// Represents the composition state.
207template <typename S, typename F>
208struct ComposeStateTuple {
209  typedef S StateId;
210  typedef F FilterState;
211
212  ComposeStateTuple()
213      : state_id1(kNoStateId), state_id2(kNoStateId),
214        filter_state(FilterState::NoState()) {}
215
216  ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
217      : state_id1(s1), state_id2(s2), filter_state(f) {}
218
219  StateId state_id1;         // State Id on fst1
220  StateId state_id2;         // State Id on fst2
221  FilterState filter_state;  // State of composition filter
222};
223
224// Equality of composition state tuples.
225template <typename S, typename F>
226inline bool operator==(const ComposeStateTuple<S, F>& x,
227                       const ComposeStateTuple<S, F>& y) {
228  if (&x == &y)
229    return true;
230  return x.state_id1 == y.state_id1 &&
231      x.state_id2 == y.state_id2 &&
232      x.filter_state == y.filter_state;
233}
234
235
236// Hashing of composition state tuples.
237template <typename S, typename F>
238class ComposeHash {
239 public:
240  size_t operator()(const ComposeStateTuple<S, F>& t) const {
241    return t.state_id1 + t.state_id2 * kPrime0 +
242        t.filter_state.Hash() * kPrime1;
243  }
244 private:
245  static const size_t kPrime0;
246  static const size_t kPrime1;
247};
248
249template <typename S, typename F>
250const size_t ComposeHash<S, F>::kPrime0 = 7853;
251
252template <typename S, typename F>
253const size_t ComposeHash<S, F>::kPrime1 = 7867;
254
255
256// A HashStateTable over composition tuples.
257template <typename A,
258          typename F,
259          typename H =
260          CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
261                                ComposeHash<typename A::StateId, F> > >
262class GenericComposeStateTable : public H {
263 public:
264  typedef A Arc;
265  typedef typename A::StateId StateId;
266  typedef F FilterState;
267  typedef ComposeStateTuple<StateId, F> StateTuple;
268
269  GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
270
271  GenericComposeStateTable(const GenericComposeStateTable<A, F> &table) {}
272
273  bool Error() const { return false; }
274
275 private:
276  void operator=(const GenericComposeStateTable<A, F> &table);  // disallow
277};
278
279
280//  Fingerprint for general composition tuples.
281template  <typename S, typename F>
282class ComposeFingerprint {
283 public:
284  typedef S StateId;
285  typedef F FilterState;
286  typedef ComposeStateTuple<S, F> StateTuple;
287
288  // Required but suboptimal constructor.
289  ComposeFingerprint() : mult1_(8192), mult2_(8192) {
290    LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
291  }
292
293  // Constructor is provided the sizes of the input FSTs
294  ComposeFingerprint(StateId nstates1, StateId nstates2)
295      : mult1_(nstates1), mult2_(nstates1 * nstates2) { }
296
297  size_t operator()(const StateTuple &tuple) {
298    return tuple.state_id1 + tuple.state_id2 * mult1_ +
299        tuple.filter_state.Hash() * mult2_;
300  }
301
302 private:
303  ssize_t mult1_;
304  ssize_t mult2_;
305};
306
307
308// Useful when the first composition state determines the tuple.
309template  <typename S, typename F>
310class ComposeState1Fingerprint {
311 public:
312  typedef S StateId;
313  typedef F FilterState;
314  typedef ComposeStateTuple<S, F> StateTuple;
315
316  size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
317};
318
319
320// Useful when the second composition state determines the tuple.
321template  <typename S, typename F>
322class ComposeState2Fingerprint {
323 public:
324  typedef S StateId;
325  typedef F FilterState;
326  typedef ComposeStateTuple<S, F> StateTuple;
327
328  size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
329};
330
331
332// A VectorStateTable over composition tuples.  This can be used when
333// the product of number of states in FST1 and FST2 (and the
334// composition filter state hash) is manageable. If the FSTs are not
335// expanded Fsts, they will first have their states counted.
336template  <typename A, typename F>
337class ProductComposeStateTable : public
338VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
339                 ComposeFingerprint<typename A::StateId, F> > {
340 public:
341  typedef A Arc;
342  typedef typename A::StateId StateId;
343  typedef F FilterState;
344  typedef ComposeStateTuple<StateId, F> StateTuple;
345
346  ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
347      : VectorStateTable<ComposeStateTuple<StateId, F>,
348                         ComposeFingerprint<StateId, F> >
349  (new ComposeFingerprint<StateId, F>(CountStates(fst1),
350                                      CountStates(fst2))) { }
351
352  ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
353      : VectorStateTable<ComposeStateTuple<StateId, F>,
354                         ComposeFingerprint<StateId, F> >
355        (new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}
356
357  bool Error() const { return false; }
358
359 private:
360  void operator=(const ProductComposeStateTable<A, F> &table);  // disallow
361};
362
363// A VectorStateTable over composition tuples.  This can be used when
364// FST1 is a string (satisfies kStringProperties) and FST2 is
365// epsilon-free and deterministic. It should be used with a
366// composition filter that creates at most one filter state per tuple
367// under these conditions (e.g. SequenceComposeFilter or
368// MatchComposeFilter).
369template <typename A, typename F>
370class StringDetComposeStateTable : public
371VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
372                 ComposeState1Fingerprint<typename A::StateId, F> > {
373 public:
374  typedef A Arc;
375  typedef typename A::StateId StateId;
376  typedef F FilterState;
377  typedef ComposeStateTuple<StateId, F> StateTuple;
378
379  StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
380      : error_(false) {
381    uint64 props1 = kString;
382    uint64 props2 = kIDeterministic | kNoIEpsilons;
383    if (fst1.Properties(props1, true) != props1 ||
384        fst2.Properties(props2, true) != props2) {
385      FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
386                 << " fst2 not input deterministic and epsilon-free";
387      error_ = true;
388    }
389  }
390
391  StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
392   : error_(table.error_) {}
393
394  bool Error() const { return error_; }
395
396 private:
397  bool error_;
398
399  void operator=(const StringDetComposeStateTable<A, F> &table);  // disallow
400};
401
402
403// A VectorStateTable over composition tuples.  This can be used when
404// FST2 is a string (satisfies kStringProperties) and FST1 is
405// epsilon-free and deterministic. It should be used with a
406// composition filter that creates at most one filter state per tuple
407// under these conditions (e.g. SequenceComposeFilter or
408// MatchComposeFilter).
409template <typename A, typename F>
410class DetStringComposeStateTable : public
411VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
412                 ComposeState1Fingerprint<typename A::StateId, F> > {
413 public:
414  typedef A Arc;
415  typedef typename A::StateId StateId;
416  typedef F FilterState;
417  typedef ComposeStateTuple<StateId, F> StateTuple;
418
419  DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
420      :error_(false) {
421    uint64 props1 = kODeterministic | kNoOEpsilons;
422    uint64 props2 = kString;
423    if (fst1.Properties(props1, true) != props1 ||
424        fst2.Properties(props2, true) != props2) {
425      FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
426                 << " fst1 not output deterministic and epsilon-free";
427      error_ = true;
428    }
429  }
430
431  DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
432      : error_(table.error_) {}
433
434  bool Error() const { return error_; }
435
436 private:
437  bool error_;
438
439  void operator=(const DetStringComposeStateTable<A, F> &table);  // disallow
440};
441
442
443// An ErasableStateTable over composition tuples. The Erase(StateId) method
444// can be called if the user either is sure that composition will never return
445// to that tuple or doesn't care that if it does, it is assigned a new
446// state ID.
447template <typename A, typename F>
448class ErasableComposeStateTable : public
449ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
450                   ComposeHash<typename A::StateId, F> > {
451 public:
452  typedef A Arc;
453  typedef typename A::StateId StateId;
454  typedef F FilterState;
455  typedef ComposeStateTuple<StateId, F> StateTuple;
456
457  ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
458
459  ErasableComposeStateTable(const ErasableComposeStateTable<A, F> &table) {}
460
461  bool Error() const { return false; }
462
463 private:
464  void operator=(const ErasableComposeStateTable<A, F> &table);  // disallow
465};
466
467}  // namespace fst
468
469#endif  // FST_LIB_STATE_TABLE_H__
470