compact-fst.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1// compact-fst.h
2
3
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// Copyright 2005-2010 Google, Inc.
17// Author: allauzen@google.com (Cyril Allauzen)
18//
19// \file
20// FST Class for memory-efficient representation of common types of
21// FSTs: linear automata, acceptors, unweighted FSTs, ...
22
23#ifndef FST_LIB_COMPACT_FST_H__
24#define FST_LIB_COMPACT_FST_H__
25
26#include <iterator>
27#include <utility>
28using std::pair; using std::make_pair;
29#include <vector>
30using std::vector;
31
32#include <fst/cache.h>
33#include <fst/expanded-fst.h>
34#include <fst/fst-decl.h>  // For optional argument declarations
35#include <fst/matcher.h>
36#include <fst/test-properties.h>
37#include <fst/util.h>
38
39
40namespace fst {
41
42struct CompactFstOptions : public CacheOptions {
43  // CompactFst default caching behaviour is to do no caching. Most
44  // compactors are cheap and therefore we save memory by not doing
45  // caching.
46  CompactFstOptions() : CacheOptions(true, 0) {}
47  CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
48};
49
50// Compactor Interface - class determinies how arcs and final weights
51// are compacted and expanded.
52//
53// Final weights are treated as transitions to the superfinal state,
54// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId.
55//
56// There are two types of compactors:
57//
58// * Fixed out-degree compactors: 'compactor.Size()' returns a
59// positive integer 's'. An FST can be compacted by this compactor
60// only if each state has exactly 's' outgoing transitions (counting a
61// non-Zero() final weight as a transition). A typical example is a
62// compactor for string FSTs, i.e. 's == 1'.
63//
64// * Variable out-degree compactors: 'compactor.Size() == -1'. There
65// are no out-degree restrictions for these compactors.
66//
67//
68// class Compactor {
69//  public:
70//   // Element is the type of the compacted transitions.
71//   typedef ... Element;
72//   // Return the compacted representation of a transition 'arc'
73//   // at a state 's'.
74//   Element Compact(StateId s, const Arc &arc);
75//   // Return the transition at state 's' represented by the compacted
76//   // transition 'e'.
77//   Arc Expand(StateId s, const Element &e);
78//   // Return -1 for variable out-degree compactors, and the mandatory
79//   // out-degree otherwise.
80//   ssize_t Size();
81//   // Test whether 'fst' can be compacted by this compactor.
82//   bool Compatible(const Fst<A> &fst);
83//   // Return the properties that are always true for an fst
84//   // compacted using this compactor
85//   uint64 Properties();
86//   // Return a string identifying the type of compactor.
87//   static const string &Type();
88//   // Write a compactor to a file.
89//   bool Write(ostream &strm);
90//   // Read a compactor from a file.
91//   static Compactor *Read(istream &strm);
92//   // Default constructor (optional, see comment below).
93//   Compactor();
94// };
95//
96// The default constructor is only required for FST_REGISTER to work
97// (i.e. enabling Convert() and the command-line utilities to work
98// with this new compactor). However, a default constructor always
99// needs to be specify for this code to compile, but one can have it
100// simply raised an error when called:
101//
102// Compactor::Compactor() {
103//   FSTERROR() << "Compactor: no default constructor";
104// }
105
106
107// Implementation data for Compact Fst, which can shared between otherwise
108// independent copies.
109//
110// The implementation contains two arrays: 'states_' and 'compacts_'.
111//
112// For fixed out-degree compactors, the 'states_' array is unallocated.
113// The 'compacts_' contains the compacted transitions. Its size is
114// 'ncompacts_'. The outgoing transitions at a given state are stored
115// consecutively. For a given state 's', its 'compactor.Size()' outgoing
116// transitions (including superfinal transition when 's' is final), are
117// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()').
118//
119// For variable out-degree compactors, the states_ array has size
120// 'nstates_ + 1' and contains pointers to positions into 'compacts_'.
121// For a given state 's', the compacted transitions of 's' are
122// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'.
123// By convention, 'states_[nstates_] == ncompacts_'.
124//
125// In both cases, the superfinal transitons (when 's' is final, i.e.
126// 'Final(s) != Weight::Zero()') is stored first.
127//
128// The unsigned type U is used to represent indices into the compacts_
129// array.
130template <class E, class U>
131class CompactFstData {
132  public:
133  typedef E CompactElement;
134  typedef U Unsigned;
135
136  CompactFstData()
137      : states_(0),
138        compacts_(0),
139        nstates_(0),
140        ncompacts_(0),
141        narcs_(0),
142        start_(kNoStateId),
143        error_(false) {}
144
145  template <class A, class Compactor>
146  CompactFstData(const Fst<A> &fst, const Compactor &compactor);
147
148  template <class Iterator, class Compactor>
149  CompactFstData(const Iterator &begin, const Iterator &end,
150                 const Compactor &compactor);
151
152  ~CompactFstData() {
153    delete[] states_;
154    delete[] compacts_;
155  }
156
157  template <class Compactor>
158  static CompactFstData<E, U> *Read(istream &strm,
159                                       const FstReadOptions &opts,
160                                       const FstHeader &hdr,
161                                       const Compactor &compactor);
162
163  bool Write(ostream &strm, const FstWriteOptions &opts) const;
164
165  Unsigned States(ssize_t i) const { return states_[i]; }
166  const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
167  size_t NumStates() const { return nstates_; }
168  size_t NumCompacts() const { return ncompacts_; }
169  size_t NumArcs() const { return narcs_; }
170  ssize_t Start() const { return start_; }
171
172  int RefCount() const { return ref_count_.count(); }
173  int IncrRefCount() { return ref_count_.Incr(); }
174  int DecrRefCount() { return ref_count_.Decr(); }
175
176  bool Error() const { return error_; }
177
178 private:
179  // Byte alignment for states and arcs in file format (version 1 only)
180  static const int kFileAlign = 16;
181
182  Unsigned *states_;
183  CompactElement *compacts_;
184  size_t nstates_;
185  size_t ncompacts_;
186  size_t narcs_;
187  ssize_t start_;
188  RefCounter ref_count_;
189  bool error_;
190};
191
192template <class E, class U>
193const int CompactFstData<E, U>::kFileAlign;
194
195
196template <class E, class U>
197template <class A, class C>
198CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor)
199    : states_(0),
200      compacts_(0),
201      nstates_(0),
202      ncompacts_(0),
203      narcs_(0),
204      start_(kNoStateId),
205      error_(false) {
206  typedef typename A::StateId StateId;
207  typedef typename A::Weight Weight;
208  start_ = fst.Start();
209  // Count # of states and arcs.
210  StateId nfinals = 0;
211  for (StateIterator< Fst<A> > siter(fst);
212       !siter.Done();
213       siter.Next()) {
214    ++nstates_;
215    StateId s = siter.Value();
216    for (ArcIterator< Fst<A> > aiter(fst, s);
217         !aiter.Done();
218         aiter.Next())
219      ++narcs_;
220    if (fst.Final(s) != Weight::Zero()) ++nfinals;
221  }
222  if (compactor.Size() == -1) {
223    states_ = new Unsigned[nstates_ + 1];
224    ncompacts_ = narcs_ + nfinals;
225    compacts_ = new CompactElement[ncompacts_];
226    states_[nstates_] = ncompacts_;
227  } else {
228    states_ = 0;
229    ncompacts_ = nstates_ * compactor.Size();
230    if ((narcs_ + nfinals) != ncompacts_) {
231      FSTERROR() << "CompactFstData: compactor incompatible with fst";
232      error_ = true;
233      return;
234    }
235    compacts_ = new CompactElement[ncompacts_];
236  }
237  size_t pos = 0, fpos = 0;
238  for (StateId s = 0; s < nstates_; ++s) {
239    fpos = pos;
240    if (compactor.Size() == -1)
241      states_[s] = pos;
242    if (fst.Final(s) != Weight::Zero())
243      compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel,
244                                                fst.Final(s), kNoStateId));
245    for (ArcIterator< Fst<A> > aiter(fst, s);
246         !aiter.Done();
247         aiter.Next()) {
248      compacts_[pos++] = compactor.Compact(s, aiter.Value());
249    }
250    if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) {
251      FSTERROR() << "CompactFstData: compactor incompatible with fst";
252      error_ = true;
253      return;
254    }
255  }
256  if (pos != ncompacts_) {
257    FSTERROR() << "CompactFstData: compactor incompatible with fst";
258    error_ = true;
259    return;
260  }
261}
262
263template <class E, class U>
264template <class Iterator, class C>
265CompactFstData<E, U>::CompactFstData(const Iterator &begin,
266                                     const Iterator &end,
267                                     const C &compactor)
268    : states_(0),
269      compacts_(0),
270      nstates_(0),
271      ncompacts_(0),
272      narcs_(0),
273      start_(kNoStateId),
274      error_(false) {
275  typedef typename C::Arc Arc;
276  typedef typename Arc::Weight Weight;
277  if (compactor.Size() != -1) {
278    ncompacts_ = distance(begin, end);
279    if (compactor.Size() == 1) {
280      // For strings, allow implicit final weight.
281      // Empty input is the empty string.
282      if (ncompacts_ == 0) {
283        ++ncompacts_;
284      } else {
285        Arc arc = compactor.Expand(ncompacts_ - 1,
286                                      *(begin + (ncompacts_ - 1)));
287        if (arc.ilabel != kNoLabel)
288          ++ncompacts_;
289      }
290    }
291    if (ncompacts_ % compactor.Size()) {
292      FSTERROR() << "CompactFstData: size of input container incompatible"
293                 << " with compactor";
294      error_ = true;
295      return;
296    }
297    if (ncompacts_ == 0)
298      return;
299    start_ = 0;
300    nstates_ = ncompacts_ / compactor.Size();
301    compacts_ = new CompactElement[ncompacts_];
302    size_t i = 0;
303    Iterator it = begin;
304    for(; it != end; ++it, ++i){
305      compacts_[i] = *it;
306      if (compactor.Expand(i, *it).ilabel != kNoLabel)
307        ++narcs_;
308    }
309    if (i < ncompacts_)
310      compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel,
311                                              Weight::One(), kNoStateId));
312  } else {
313    if (distance(begin, end) == 0)
314      return;
315    // Count # of states, arcs and compacts.
316    Iterator it = begin;
317    for(size_t i = 0; it != end; ++it, ++i) {
318      Arc arc = compactor.Expand(i, *it);
319      if (arc.ilabel != kNoLabel) {
320        ++narcs_;
321        ++ncompacts_;
322      } else {
323        ++nstates_;
324        if (arc.weight != Weight::Zero())
325          ++ncompacts_;
326      }
327    }
328    start_ = 0;
329    compacts_ = new CompactElement[ncompacts_];
330    states_ = new Unsigned[nstates_ + 1];
331    states_[nstates_] = ncompacts_;
332    size_t i = 0, s = 0;
333    for(it = begin; it != end; ++it) {
334      Arc arc = compactor.Expand(i, *it);
335      if (arc.ilabel != kNoLabel) {
336        compacts_[i++] = *it;
337      } else {
338        states_[s++] = i;
339        if (arc.weight != Weight::Zero())
340          compacts_[i++] = *it;
341      }
342    }
343    if ((s != nstates_) || (i != ncompacts_)) {
344      FSTERROR() << "CompactFstData: ill-formed input container";
345      error_ = true;
346      return;
347    }
348  }
349}
350
351template <class E, class U>
352template <class C>
353CompactFstData<E, U> *CompactFstData<E, U>::Read(
354    istream &strm,
355    const FstReadOptions &opts,
356    const FstHeader &hdr,
357    const C &compactor) {
358  CompactFstData<E, U> *data = new CompactFstData<E, U>();
359  data->start_ = hdr.Start();
360  data->nstates_ = hdr.NumStates();
361  data->narcs_ = hdr.NumArcs();
362
363  if (compactor.Size() == -1) {
364    data->states_ = new Unsigned[data->nstates_ + 1];
365    if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
366        !AlignInput(strm, kFileAlign)) {
367      LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
368      delete data;
369      return 0;
370    }
371    // TODO: memory map this
372    size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
373    strm.read(reinterpret_cast<char *>(data->states_), b);
374    if (!strm) {
375      LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
376      delete data;
377      return 0;
378    }
379  } else {
380    data->states_ = 0;
381  }
382  data->ncompacts_ = compactor.Size() == -1
383      ? data->states_[data->nstates_]
384      : data->nstates_ * compactor.Size();
385  data->compacts_ = new CompactElement[data->ncompacts_];
386  // TODO: memory map this
387  size_t b = data->ncompacts_ * sizeof(CompactElement);
388  if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) &&
389      !AlignInput(strm, kFileAlign)) {
390    LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
391    delete data;
392    return 0;
393  }
394  strm.read(reinterpret_cast<char *>(data->compacts_), b);
395  if (!strm) {
396    LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
397    delete data;
398    return 0;
399  }
400  return data;
401}
402
403template<class E, class U>
404bool CompactFstData<E, U>::Write(ostream &strm,
405                                 const FstWriteOptions &opts) const {
406  if (states_) {
407    if (opts.align && !AlignOutput(strm, kFileAlign)) {
408      LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
409      return false;
410    }
411    strm.write(reinterpret_cast<char *>(states_),
412               (nstates_ + 1) * sizeof(Unsigned));
413  }
414  if (opts.align && !AlignOutput(strm, kFileAlign)) {
415    LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
416    return false;
417  }
418  strm.write(reinterpret_cast<char *>(compacts_),
419             ncompacts_ * sizeof(CompactElement));
420
421  strm.flush();
422  if (!strm) {
423    LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
424    return false;
425  }
426  return true;
427}
428
429template <class A, class C, class U> class CompactFst;
430template <class F, class G> void Cast(const F &, G *);
431
432// Implementation class for CompactFst, which contains CompactFstData
433// and Fst cache.
434template <class A, class C, class U>
435class CompactFstImpl : public CacheImpl<A> {
436 public:
437  using FstImpl<A>::SetType;
438  using FstImpl<A>::SetProperties;
439  using FstImpl<A>::Properties;
440  using FstImpl<A>::SetInputSymbols;
441  using FstImpl<A>::SetOutputSymbols;
442  using FstImpl<A>::WriteHeader;
443
444  using CacheImpl<A>::PushArc;
445  using CacheImpl<A>::HasArcs;
446  using CacheImpl<A>::HasFinal;
447  using CacheImpl<A>::HasStart;
448  using CacheImpl<A>::SetArcs;
449  using CacheImpl<A>::SetFinal;
450  using CacheImpl<A>::SetStart;
451
452  typedef A Arc;
453  typedef typename A::Weight Weight;
454  typedef typename A::StateId StateId;
455  typedef C Compactor;
456  typedef typename C::Element CompactElement;
457  typedef U Unsigned;
458
459  CompactFstImpl()
460      :  CacheImpl<A>(CompactFstOptions()),
461         compactor_(0),
462         own_compactor_(false),
463         data_(0) {
464    string type = "compact";
465    if (sizeof(U) != sizeof(uint32)) {
466      string size;
467      Int64ToStr(8 * sizeof(U), &size);
468      type += size;
469    }
470    type += "_";
471    type += C::Type();
472    SetType(type);
473    SetProperties(kNullProperties | kStaticProperties);
474  }
475
476  CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
477                 const CompactFstOptions &opts)
478      : CacheImpl<A>(opts),
479        compactor_(new C(compactor)),
480        own_compactor_(true),
481        data_(0) {
482    Init(fst);
483  }
484
485  CompactFstImpl(const Fst<Arc> &fst, C *compactor,
486                 const CompactFstOptions &opts)
487      : CacheImpl<A>(opts),
488        compactor_(compactor),
489        own_compactor_(false),
490        data_(0) {
491    Init(fst);
492  }
493
494  template <class Iterator>
495  CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
496                 const CompactFstOptions &opts)
497      : CacheImpl<A>(opts),
498        compactor_(new C(compactor)),
499        own_compactor_(true),
500        data_(0) {
501    Init(b, e);
502  }
503
504  template <class Iterator>
505  CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
506                 const CompactFstOptions &opts)
507      : CacheImpl<A>(opts),
508        compactor_(compactor),
509        own_compactor_(false),
510        data_(0) {
511    Init(b, e);
512  }
513
514  CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
515      : CacheImpl<A>(impl),
516        compactor_(new C(*impl.compactor_)),
517        own_compactor_(true),
518        data_(impl.data_) {
519    if (data_)
520      data_->IncrRefCount();
521    SetType(impl.Type());
522    SetProperties(impl.Properties());
523    SetInputSymbols(impl.InputSymbols());
524    SetOutputSymbols(impl.OutputSymbols());
525  }
526
527  ~CompactFstImpl(){
528    if (own_compactor_)
529      delete compactor_;
530    if (data_ && !data_->DecrRefCount())
531      delete data_;
532  }
533
534  StateId Start() {
535    if (!HasStart()) {
536      SetStart(data_->Start());
537    }
538    return CacheImpl<A>::Start();
539  }
540
541  Weight Final(StateId s) {
542    if (!HasFinal(s)) {
543      Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
544      if ((compactor_->Size() != -1) ||
545          (data_->States(s) != data_->States(s + 1)))
546        arc = ComputeArc(s,
547                         compactor_->Size() == -1
548                         ? data_->States(s)
549                         : s * compactor_->Size());
550      SetFinal(s, arc.ilabel == kNoLabel ? arc.weight : Weight::Zero());
551    }
552    return CacheImpl<A>::Final(s);
553  }
554
555  StateId NumStates() const {
556    if (Properties(kError)) return 0;
557    return data_->NumStates();
558  }
559
560  size_t NumArcs(StateId s) {
561    if (HasArcs(s))
562      return CacheImpl<A>::NumArcs(s);
563    Unsigned i, num_arcs;
564    if (compactor_->Size() == -1) {
565      i = data_->States(s);
566      num_arcs = data_->States(s + 1) - i;
567    } else {
568      i =  s * compactor_->Size();
569      num_arcs = compactor_->Size();
570    }
571    if (num_arcs > 0) {
572      const A &arc = ComputeArc(s, i, kArcILabelValue);
573      if (arc.ilabel == kNoStateId) {
574        --num_arcs;
575      }
576    }
577    return num_arcs;
578  }
579
580  size_t NumInputEpsilons(StateId s) {
581    if (!HasArcs(s) && !Properties(kILabelSorted))
582      Expand(s);
583    if (HasArcs(s))
584      return CacheImpl<A>::NumInputEpsilons(s);
585    return CountEpsilons(s, false);
586  }
587
588  size_t NumOutputEpsilons(StateId s) {
589    if (!HasArcs(s) && !Properties(kOLabelSorted))
590      Expand(s);
591    if (HasArcs(s))
592      return CacheImpl<A>::NumOutputEpsilons(s);
593    return CountEpsilons(s, true);
594  }
595
596  size_t CountEpsilons(StateId s, bool output_epsilons) {
597    size_t begin =  compactor_->Size() == -1 ?
598        data_->States(s) : s * compactor_->Size();
599    size_t end = compactor_->Size() == -1 ?
600        data_->States(s + 1) : (s + 1) * compactor_->Size();
601    size_t num_eps = 0;
602    for (size_t i = begin; i < end; ++i) {
603      const A &arc = ComputeArc(
604          s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
605      const typename A::Label &label =
606          (output_epsilons ? arc.olabel : arc.ilabel);
607      if (label == kNoLabel)
608        continue;
609      else if (label > 0)
610        break;
611      ++num_eps;
612    }
613    return num_eps;
614  }
615
616  static CompactFstImpl<A, C, U> *Read(istream &strm,
617                                       const FstReadOptions &opts) {
618    CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
619    FstHeader hdr;
620    if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
621      delete impl;
622      return 0;
623    }
624
625    // Ensures compatibility
626    if (hdr.Version() == kAlignedFileVersion)
627      hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
628
629    impl->compactor_ = C::Read(strm);
630    if (!impl->compactor_) {
631      delete impl;
632      return 0;
633    }
634    impl->own_compactor_ = true;
635    impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr,
636                                                          *impl->compactor_);
637    if (!impl->data_) {
638      delete impl;
639      return 0;
640    }
641    return impl;
642  }
643
644  bool Write(ostream &strm, const FstWriteOptions &opts) const {
645    FstHeader hdr;
646    hdr.SetStart(data_->Start());
647    hdr.SetNumStates(data_->NumStates());
648    hdr.SetNumArcs(data_->NumArcs());
649
650    // Ensures compatibility
651    int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
652    WriteHeader(strm, opts, file_version, &hdr);
653
654    compactor_->Write(strm);
655    return data_->Write(strm, opts);
656  }
657
658  // Provide information needed for generic state iterator
659  void InitStateIterator(StateIteratorData<A> *data) const {
660    data->base = 0;
661    data->nstates = data_->NumStates();
662  }
663
664  void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
665    if (!HasArcs(s))
666      Expand(s);
667    CacheImpl<A>::InitArcIterator(s, data);
668  }
669
670  Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
671    return compactor_->Expand(s, data_->Compacts(i), f);
672  }
673
674  void Expand(StateId s) {
675    size_t begin =  compactor_->Size() == -1 ?
676        data_->States(s) : s * compactor_->Size();
677    size_t end = compactor_->Size() == -1 ?
678        data_->States(s + 1) : (s + 1) * compactor_->Size();
679    for (size_t i = begin; i < end; ++i) {
680      const Arc &arc = ComputeArc(s, i);
681      if (arc.ilabel == kNoLabel) continue;
682      PushArc(s, arc);
683    }
684    SetArcs(s);
685  }
686
687  template <class Iterator>
688  void SetCompactElements(const Iterator &b, const Iterator &e) {
689    if (data_ && !data_->DecrRefCount())
690      delete data_;
691    data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
692  }
693
694  C *GetCompactor() const { return compactor_; }
695  CompactFstData<CompactElement, U> *Data() const { return data_; }
696
697 protected:
698  template <class B, class D>
699  explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
700      : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
701        compactor_(new C(*impl.GetCompactor())),
702        own_compactor_(true),
703        data_(impl.Data()) {
704    if (data_)
705      data_->IncrRefCount();
706    SetType(impl.Type());
707    SetProperties(impl.Properties());
708    SetInputSymbols(impl.InputSymbols());
709    SetOutputSymbols(impl.OutputSymbols());
710  }
711
712 private:
713  void Init(const Fst<Arc> &fst) {
714    string type = "compact";
715    if (sizeof(U) != sizeof(uint32)) {
716      string size;
717      Int64ToStr(8 * sizeof(U), &size);
718      type += size;
719    }
720    type += "_";
721    type += compactor_->Type();
722    SetType(type);
723    SetInputSymbols(fst.InputSymbols());
724    SetOutputSymbols(fst.OutputSymbols());
725    data_ = new CompactFstData<CompactElement, U>(fst, *compactor_);
726    if (data_->Error())
727      SetProperties(kError, kError);
728    uint64 copy_properties = fst.Properties(kCopyProperties, true);
729    if ((copy_properties & kError) || !compactor_->Compatible(fst)) {
730      FSTERROR() << "CompactFstImpl: input fst incompatible with compactor";
731      SetProperties(kError, kError);
732      return;
733    }
734    SetProperties(copy_properties | kStaticProperties);
735  }
736
737  template <class Iterator>
738  void Init(const Iterator &b, const Iterator &e) {
739    string type = "compact";
740    if (sizeof(U) != sizeof(uint32)) {
741      string size;
742      Int64ToStr(8 * sizeof(U), &size);
743      type += size;
744    }
745    type += "_";
746    type += compactor_->Type();
747    SetType(type);
748    SetProperties(kStaticProperties | compactor_->Properties());
749    data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
750    if (data_->Error())
751      SetProperties(kError, kError);
752  }
753
754  // Properties always true of this Fst class
755  static const uint64 kStaticProperties = kExpanded;
756  // Current unaligned file format version
757  static const int kFileVersion = 2;
758  // Current aligned file format version
759  static const int kAlignedFileVersion = 1;
760  // Minimum file format version supported
761  static const int kMinFileVersion = 1;
762
763  C *compactor_;
764  bool own_compactor_;
765  CompactFstData<CompactElement, U> *data_;
766};
767
768template <class A, class C, class U>
769const uint64 CompactFstImpl<A, C, U>::kStaticProperties;
770template <class A, class C, class U>
771const int CompactFstImpl<A, C, U>::kFileVersion;
772template <class A, class C, class U>
773const int CompactFstImpl<A, C, U>::kAlignedFileVersion;
774template <class A, class C, class U>
775const int CompactFstImpl<A, C, U>::kMinFileVersion;
776
777
778// CompactFst.  This class attaches interface to implementation and
779// handles reference counting, delegating most methods to
780// ImplToExpandedFst. The unsigned type U is used to represent indices
781// into the compact arc array (uint32 by default, declared in
782// fst-decl.h).
783template <class A, class C, class U>
784class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
785 public:
786  friend class StateIterator< CompactFst<A, C, U> >;
787  friend class ArcIterator< CompactFst<A, C, U> >;
788  template <class F, class G> void friend Cast(const F &, G *);
789
790  typedef A Arc;
791  typedef typename A::StateId StateId;
792  typedef CompactFstImpl<A, C, U> Impl;
793  typedef CacheState<A> State;
794  typedef U Unsigned;
795
796  CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
797
798  explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
799                      const CompactFstOptions &opts = CompactFstOptions())
800      : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
801
802  CompactFst(const Fst<A> &fst, C *compactor,
803             const CompactFstOptions &opts = CompactFstOptions())
804      : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
805
806  // The following 2 constructors take as input two iterators delimiting
807  // a set of (already) compacted transitions, starting with the
808  // transitions out of the initial state. The format of the input
809  // differs for fixed out-degree and variable out-degree compactors.
810  //
811  // - For fixed out-degree compactors, the final weight (encoded as a
812  // compacted transition) needs to be given only for final
813  // states. All strings (compactor of size 1) will be assume to be
814  // terminated by a final state even when the final state is not
815  // implicitely given.
816  //
817  // - For variable out-degree compactors, the final weight (encoded
818  // as a compacted transition) needs to be given for all states and
819  // must appeared first in the list (for state s, final weight of s,
820  // followed by outgoing transitons in s).
821  //
822  // These 2 constructors allows the direct construction of a CompactFst
823  // without first creating a more memory hungry 'regular' FST. This
824  // is useful when memory usage is severely constrained.
825  template <class Iterator>
826  explicit CompactFst(const Iterator &begin, const Iterator &end,
827                      const C &compactor = C(),
828                      const CompactFstOptions &opts = CompactFstOptions())
829      : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
830
831  template <class Iterator>
832  CompactFst(const Iterator &begin, const Iterator &end,
833             C *compactor, const CompactFstOptions &opts = CompactFstOptions())
834      : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
835
836  // See Fst<>::Copy() for doc.
837  CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
838      : ImplToExpandedFst<Impl>(fst, safe) {}
839
840  // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
841  virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
842    return new CompactFst<A, C, U>(*this, safe);
843  }
844
845  // Read a CompactFst from an input stream; return NULL on error
846  static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
847    Impl* impl = Impl::Read(strm, opts);
848    return impl ? new CompactFst<A, C, U>(impl) : 0;
849  }
850
851  // Read a CompactFst from a file; return NULL on error
852  // Empty filename reads from standard input
853  static CompactFst<A, C, U> *Read(const string &filename) {
854    Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
855    return impl ? new CompactFst<A, C, U>(impl) : 0;
856  }
857
858  virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
859    return GetImpl()->Write(strm, opts);
860  }
861
862  virtual bool Write(const string &filename) const {
863    return Fst<A>::WriteFile(filename);
864  }
865
866  virtual void InitStateIterator(StateIteratorData<A> *data) const {
867    GetImpl()->InitStateIterator(data);
868  }
869
870  virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
871    GetImpl()->InitArcIterator(s, data);
872  }
873
874  virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
875    return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
876  }
877
878  template <class Iterator>
879  void SetCompactElements(const Iterator &b, const Iterator &e) {
880    GetImpl()->SetCompactElements(b, e);
881  }
882
883 private:
884  CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
885
886  // Makes visible to friends.
887  Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
888
889  void SetImpl(Impl *impl, bool own_impl = false) {
890    ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
891  }
892
893  void operator=(const CompactFst<A, C, U> &fst);  // disallow
894};
895
896
897// Specialization for CompactFst; see generic version in fst.h
898// for sample usage (but use the CompactFst type!). This version
899// should inline.
900template <class A, class C, class U>
901class StateIterator< CompactFst<A, C, U> > {
902 public:
903  typedef typename A::StateId StateId;
904
905  explicit StateIterator(const CompactFst<A, C, U> &fst)
906      : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
907
908  bool Done() const { return s_ >= nstates_; }
909
910  StateId Value() const { return s_; }
911
912  void Next() { ++s_; }
913
914  void Reset() { s_ = 0; }
915
916 private:
917  StateId nstates_;
918  StateId s_;
919
920  DISALLOW_COPY_AND_ASSIGN(StateIterator);
921};
922
923// Specialization for CompactFst.
924// Never caches, always iterates over the underlying compact elements.
925template <class A, class C, class U>
926class ArcIterator< CompactFst<A, C, U> > {
927 public:
928  typedef typename A::StateId StateId;
929  typedef typename C::Element CompactElement;
930
931  ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
932      : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0),
933        pos_(0), flags_(kArcValueFlags) {
934
935    const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data();
936    size_t offset;
937    if (compactor_->Size() == -1) {  // Variable out-degree compactor
938      offset = data->States(s);
939      num_arcs_ = data->States(s + 1) - offset;
940    } else {  // Fixed out-degree compactor
941      offset =  s * compactor_->Size();
942      num_arcs_ = compactor_->Size();
943    }
944    if (num_arcs_ > 0) {
945      compacts_ = &(data->Compacts(offset));
946      arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue);
947      if (arc_.ilabel == kNoStateId) {
948        ++compacts_;
949        --num_arcs_;
950      }
951    }
952  }
953
954  ~ArcIterator() {}
955
956  bool Done() const { return pos_ >= num_arcs_; }
957
958  const A& Value() const {
959    arc_ = compactor_->Expand(state_, compacts_[pos_], flags_);
960    return arc_;
961  }
962
963  void Next() { ++pos_; }
964
965  size_t Position() const { return pos_; }
966
967  void Reset() { pos_ = 0;  }
968
969  void Seek(size_t pos) { pos_ = pos; }
970
971  uint32 Flags() const { return flags_; }
972
973  void SetFlags(uint32 f, uint32 m) {
974    flags_ &= ~m;
975    flags_ |= (f & kArcValueFlags);
976  }
977
978 private:
979  C *compactor_;
980  StateId state_;
981  const CompactElement *compacts_;
982  size_t pos_;
983  size_t num_arcs_;
984  mutable A arc_;
985  uint32 flags_;
986
987  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
988};
989
990// // Specialization for CompactFst.
991// // This is an optionally caching arc iterator.
992// // TODO(allauzen): implements the kArcValueFlags, the current
993// // implementation only implements the kArcNoCache flag.
994// template <class A, class C, class U>
995// class ArcIterator< CompactFst<A, C, U> > {
996//  public:
997//   typedef typename A::StateId StateId;
998
999//   ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
1000//       : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0),
1001//         flags_(kArcValueFlags) {
1002//     cache_data_.ref_count = 0;
1003
1004//     if (fst_.GetImpl()->HasArcs(state_)) {
1005//       fst_.GetImpl()->InitArcIterator(s, &cache_data_);
1006//       num_arcs_ = cache_data_.narcs;
1007//       return;
1008//     }
1009
1010//     const C *compactor = fst_.GetImpl()->GetCompactor();
1011//     const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data();
1012//     if (compactor->Size() == -1) {  // Variable out-degree compactor
1013//       offset_ = data->States(s);
1014//       num_arcs_ = data->States(s + 1) - offset_;
1015//     } else {  // Fixed out-degree compactor
1016//       offset_ =  s * compactor->Size();
1017//       num_arcs_ = compactor->Size();
1018//     }
1019//     if (num_arcs_ > 0) {
1020//       const A &arc = fst_.GetImpl()->ComputeArc(s, offset_);
1021//       if (arc.ilabel == kNoStateId) {
1022//         ++offset_;
1023//         --num_arcs_;
1024//       }
1025//     }
1026//   }
1027
1028
1029//   ~ArcIterator() {
1030//     if (cache_data_.ref_count)
1031//       --(*cache_data_.ref_count);
1032//   }
1033
1034//   bool Done() const { return pos_ >= num_arcs_; }
1035
1036//   const A& Value() const {
1037//     if (cache_data_.ref_count == 0) {
1038//       if (flags_ & kArcNoCache) {
1039//         arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_);
1040//         return arc_;
1041//       } else {
1042//         fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
1043//       }
1044//     }
1045//     return cache_data_.arcs[pos_];
1046//   }
1047
1048//   void Next() { ++pos_; }
1049
1050//   size_t Position() const { return pos_; }
1051
1052//   void Reset() { pos_ = 0;  }
1053
1054//   void Seek(size_t pos) { pos_ = pos; }
1055
1056//   uint32 Flags() const { return flags_; }
1057
1058//   void SetFlags(uint32 f, uint32 m) {
1059//     flags_ &= ~m;
1060//     flags_ |= f;
1061
1062//     if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0)
1063//       fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
1064//   }
1065
1066//  private:
1067//   mutable const CompactFst<A, C, U> &fst_;
1068//   StateId state_;
1069//   size_t pos_;
1070//   size_t num_arcs_;
1071//   size_t offset_;
1072//   uint32 flags_;
1073//   mutable A arc_;
1074//   mutable ArcIteratorData<A> cache_data_;
1075
1076//   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
1077// };
1078
1079
1080//
1081// Utility Compactors
1082//
1083
1084// Compactor for unweighted string FSTs
1085template <class A>
1086class StringCompactor {
1087 public:
1088  typedef A Arc;
1089  typedef typename A::Label Element;
1090  typedef typename A::Label Label;
1091  typedef typename A::StateId StateId;
1092  typedef typename A::Weight Weight;
1093
1094  Element Compact(StateId s, const A &arc) const { return arc.ilabel; }
1095
1096  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1097    return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
1098  }
1099
1100  ssize_t Size() const { return 1; }
1101
1102  uint64 Properties() const {
1103    return kString | kAcceptor | kUnweighted;
1104  }
1105
1106  bool Compatible(const Fst<A> &fst) const {
1107    uint64 props = Properties();
1108    return fst.Properties(props, true) == props;
1109  }
1110
1111  static const string &Type() {
1112    static const string type = "string";
1113    return type;
1114  }
1115
1116  bool Write(ostream &strm) const { return true; }
1117
1118  static StringCompactor *Read(istream &strm) {
1119    return new StringCompactor;
1120  }
1121};
1122
1123
1124// Compactor for weighted string FSTs
1125template <class A>
1126class WeightedStringCompactor {
1127 public:
1128  typedef A Arc;
1129  typedef typename A::Label Label;
1130  typedef typename A::StateId StateId;
1131  typedef typename A::Weight Weight;
1132  typedef pair<Label, Weight> Element;
1133
1134  Element Compact(StateId s, const A &arc) const {
1135    return make_pair(arc.ilabel, arc.weight);
1136  }
1137
1138  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1139    return Arc(p.first, p.first, p.second,
1140               p.first != kNoLabel ? s + 1 : kNoStateId);
1141  }
1142
1143  ssize_t Size() const { return 1;}
1144
1145  uint64 Properties() const {
1146    return kString | kAcceptor;
1147  }
1148
1149  bool Compatible(const Fst<A> &fst) const {
1150    uint64 props = Properties();
1151    return fst.Properties(props, true) == props;
1152  }
1153
1154  static const string &Type() {
1155    static const string type = "weighted_string";
1156    return type;
1157  }
1158
1159  bool Write(ostream &strm) const { return true; }
1160
1161  static WeightedStringCompactor *Read(istream &strm) {
1162    return new WeightedStringCompactor;
1163  }
1164};
1165
1166
1167// Compactor for unweighted acceptor FSTs
1168template <class A>
1169class UnweightedAcceptorCompactor {
1170 public:
1171  typedef A Arc;
1172  typedef typename A::Label Label;
1173  typedef typename A::StateId StateId;
1174  typedef typename A::Weight Weight;
1175  typedef pair<Label, StateId> Element;
1176
1177  Element Compact(StateId s, const A &arc) const {
1178    return make_pair(arc.ilabel, arc.nextstate);
1179  }
1180
1181  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1182    return Arc(p.first, p.first, Weight::One(), p.second);
1183  }
1184
1185  ssize_t Size() const { return -1;}
1186
1187  uint64 Properties() const {
1188    return kAcceptor | kUnweighted;
1189  }
1190
1191  bool Compatible(const Fst<A> &fst) const {
1192    uint64 props = Properties();
1193    return fst.Properties(props, true) == props;
1194  }
1195
1196  static const string &Type() {
1197    static const string type = "unweighted_acceptor";
1198    return type;
1199  }
1200
1201  bool Write(ostream &strm) const { return true; }
1202
1203  static UnweightedAcceptorCompactor *Read(istream &istrm) {
1204    return new UnweightedAcceptorCompactor;
1205  }
1206};
1207
1208
1209// Compactor for weighted acceptor FSTs
1210template <class A>
1211class AcceptorCompactor {
1212 public:
1213  typedef A Arc;
1214  typedef typename A::Label Label;
1215  typedef typename A::StateId StateId;
1216  typedef typename A::Weight Weight;
1217  typedef pair< pair<Label, Weight>, StateId > Element;
1218
1219  Element Compact(StateId s, const A &arc) const {
1220    return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate);
1221  }
1222
1223  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1224    return Arc(p.first.first, p.first.first, p.first.second, p.second);
1225  }
1226
1227  ssize_t Size() const { return -1;}
1228
1229  uint64 Properties() const {
1230    return kAcceptor;
1231  }
1232
1233  bool Compatible(const Fst<A> &fst) const {
1234    uint64 props = Properties();
1235    return fst.Properties(props, true) == props;
1236  }
1237
1238  static const string &Type() {
1239    static const string type = "acceptor";
1240    return type;
1241  }
1242
1243  bool Write(ostream &strm) const { return true; }
1244
1245  static AcceptorCompactor *Read(istream &strm) {
1246    return new AcceptorCompactor;
1247  }
1248};
1249
1250
1251// Compactor for unweighted FSTs
1252template <class A>
1253class UnweightedCompactor {
1254 public:
1255  typedef A Arc;
1256  typedef typename A::Label Label;
1257  typedef typename A::StateId StateId;
1258  typedef typename A::Weight Weight;
1259  typedef pair< pair<Label, Label>, StateId > Element;
1260
1261  Element Compact(StateId s, const A &arc) const {
1262    return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate);
1263  }
1264
1265  Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1266    return Arc(p.first.first, p.first.second, Weight::One(), p.second);
1267  }
1268
1269  ssize_t Size() const { return -1; }
1270
1271  uint64 Properties() const {
1272    return kUnweighted;
1273  }
1274
1275  bool Compatible(const Fst<A> &fst) const {
1276    uint64 props = Properties();
1277    return fst.Properties(props, true) == props;
1278  }
1279
1280  static const string &Type() {
1281    static const string type = "unweighted";
1282    return type;
1283  }
1284
1285  bool Write(ostream &strm) const { return true; }
1286
1287  static UnweightedCompactor *Read(istream &strm) {
1288    return new UnweightedCompactor;
1289  }
1290};
1291
1292
1293// Uselful aliases when using StdArc
1294typedef CompactFst< StdArc, StringCompactor<StdArc> >
1295StdCompactStringFst;
1296typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> >
1297StdCompactWeightedStringFst;
1298typedef CompactFst<StdArc, AcceptorCompactor<StdArc> >
1299StdCompactAcceptorFst;
1300typedef CompactFst<StdArc, UnweightedCompactor<StdArc> >
1301StdCompactUnweightedFst;
1302typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> >
1303StdCompactUnweightedAcceptorFst;
1304
1305}  // namespace fst
1306
1307#endif // FST_LIB_COMPACT_FST_H__
1308