arc-map.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1// arc-map.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Class to map over/transform arcs e.g., change semirings or
20// implement project/invert. Consider using when operation does
21// not change the number of arcs (except possibly superfinal arcs).
22
23#ifndef FST_LIB_ARC_MAP_H__
24#define FST_LIB_ARC_MAP_H__
25
26#include <unordered_map>
27using std::tr1::unordered_map;
28using std::tr1::unordered_multimap;
29#include <string>
30#include <utility>
31using std::pair; using std::make_pair;
32
33#include <fst/cache.h>
34#include <fst/mutable-fst.h>
35
36
37namespace fst {
38
39// This determines how final weights are mapped.
40enum MapFinalAction {
41  // A final weight is mapped into a final weight. An error
42  // is raised if this is not possible.
43  MAP_NO_SUPERFINAL,
44
45  // A final weight is mapped to an arc to the superfinal state
46  // when the result cannot be represented as a final weight.
47  // The superfinal state will be added only if it is needed.
48  MAP_ALLOW_SUPERFINAL,
49
50  // A final weight is mapped to an arc to the superfinal state
51  // unless the result can be represented as a final weight of weight
52  // Zero(). The superfinal state is always added (if the input is
53  // not the empty Fst).
54  MAP_REQUIRE_SUPERFINAL
55};
56
57// This determines how symbol tables are mapped.
58enum MapSymbolsAction {
59  // Symbols should be cleared in the result by the map.
60  MAP_CLEAR_SYMBOLS,
61
62  // Symbols should be copied from the input FST by the map.
63  MAP_COPY_SYMBOLS,
64
65  // Symbols should not be modified in the result by the map itself.
66  // (They may set by the mapper).
67  MAP_NOOP_SYMBOLS
68};
69
70// ArcMapper Interface - class determinies how arcs and final weights
71// are mapped. Useful for implementing operations that do not change
72// the number of arcs (expect possibly superfinal arcs).
73//
74// class ArcMapper {
75//  public:
76//   typedef A FromArc;
77//   typedef B ToArc;
78//
79//   // Maps an arc type A to arc type B.
80//   B operator()(const A &arc);
81//   // Specifies final action the mapper requires (see above).
82//   // The mapper will be passed final weights as arcs of the
83//   // form A(0, 0, weight, kNoStateId).
84//   MapFinalAction FinalAction() const;
85//   // Specifies input symbol table action the mapper requires (see above).
86//   MapSymbolsAction InputSymbolsAction() const;
87//   // Specifies output symbol table action the mapper requires (see above).
88//   MapSymbolsAction OutputSymbolsAction() const;
89//   // This specifies the known properties of an Fst mapped by this
90//   // mapper. It takes as argument the input Fst's known properties.
91//   uint64 Properties(uint64 props) const;
92// };
93//
94// The ArcMap functions and classes below will use the FinalAction()
95// method of the mapper to determine how to treat final weights,
96// e.g. whether to add a superfinal state. They will use the Properties()
97// method to set the result Fst properties.
98//
99// We include a various map versions below. One dimension of
100// variation is whether the mapping mutates its input, writes to a
101// new result Fst, or is an on-the-fly Fst. Another dimension is how
102// we pass the mapper. We allow passing the mapper by pointer
103// for cases that we need to change the state of the user's mapper.
104// This is the case with the encode mapper, which is reused during
105// decoding. We also include map versions that pass the mapper
106// by value or const reference when this suffices.
107
108
109// Maps an arc type A using a mapper function object C, passed
110// by pointer.  This version modifies its Fst input.
111template<class A, class C>
112void ArcMap(MutableFst<A> *fst, C* mapper) {
113  typedef typename A::StateId StateId;
114  typedef typename A::Weight Weight;
115
116  if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
117    fst->SetInputSymbols(0);
118
119  if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
120    fst->SetOutputSymbols(0);
121
122  if (fst->Start() == kNoStateId)
123    return;
124
125  uint64 props = fst->Properties(kFstProperties, false);
126
127  MapFinalAction final_action = mapper->FinalAction();
128  StateId superfinal = kNoStateId;
129  if (final_action == MAP_REQUIRE_SUPERFINAL) {
130    superfinal = fst->AddState();
131    fst->SetFinal(superfinal, Weight::One());
132  }
133
134  for (StateId s = 0; s < fst->NumStates(); ++s) {
135    for (MutableArcIterator< MutableFst<A> > aiter(fst, s);
136         !aiter.Done(); aiter.Next()) {
137      const A &arc = aiter.Value();
138      aiter.SetValue((*mapper)(arc));
139    }
140
141    switch (final_action) {
142      case MAP_NO_SUPERFINAL:
143      default: {
144        A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId));
145        if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
146          FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc";
147          fst->SetProperties(kError, kError);
148        }
149
150        fst->SetFinal(s, final_arc.weight);
151        break;
152      }
153      case MAP_ALLOW_SUPERFINAL: {
154        if (s != superfinal) {
155          A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId));
156          if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
157            // Add a superfinal state if not already done.
158            if (superfinal == kNoStateId) {
159              superfinal = fst->AddState();
160              fst->SetFinal(superfinal, Weight::One());
161            }
162            final_arc.nextstate = superfinal;
163            fst->AddArc(s, final_arc);
164            fst->SetFinal(s, Weight::Zero());
165          } else {
166            fst->SetFinal(s, final_arc.weight);
167          }
168          break;
169        }
170      }
171      case MAP_REQUIRE_SUPERFINAL: {
172        if (s != superfinal) {
173          A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId));
174          if (final_arc.ilabel != 0 || final_arc.olabel != 0 ||
175              final_arc.weight != Weight::Zero())
176            fst->AddArc(s, A(final_arc.ilabel, final_arc.olabel,
177                             final_arc.weight, superfinal));
178            fst->SetFinal(s, Weight::Zero());
179        }
180        break;
181      }
182    }
183  }
184  fst->SetProperties(mapper->Properties(props), kFstProperties);
185}
186
187
188// Maps an arc type A using a mapper function object C, passed
189// by value.  This version modifies its Fst input.
190template<class A, class C>
191void ArcMap(MutableFst<A> *fst, C mapper) {
192  ArcMap(fst, &mapper);
193}
194
195
196// Maps an arc type A to an arc type B using mapper function
197// object C, passed by pointer. This version writes the mapped
198// input Fst to an output MutableFst.
199template<class A, class B, class C>
200void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) {
201  typedef typename A::StateId StateId;
202  typedef typename A::Weight Weight;
203
204  ofst->DeleteStates();
205
206  if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS)
207    ofst->SetInputSymbols(ifst.InputSymbols());
208  else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
209    ofst->SetInputSymbols(0);
210
211  if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS)
212    ofst->SetOutputSymbols(ifst.OutputSymbols());
213  else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
214    ofst->SetOutputSymbols(0);
215
216  uint64 iprops = ifst.Properties(kCopyProperties, false);
217
218  if (ifst.Start() == kNoStateId) {
219    if (iprops & kError) ofst->SetProperties(kError, kError);
220    return;
221  }
222
223  MapFinalAction final_action = mapper->FinalAction();
224  if (ifst.Properties(kExpanded, false)) {
225    ofst->ReserveStates(CountStates(ifst) +
226                        final_action == MAP_NO_SUPERFINAL ? 0 : 1);
227  }
228
229  // Add all states.
230  for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next())
231    ofst->AddState();
232
233  StateId superfinal = kNoStateId;
234  if (final_action == MAP_REQUIRE_SUPERFINAL) {
235    superfinal = ofst->AddState();
236    ofst->SetFinal(superfinal, B::Weight::One());
237  }
238  for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) {
239    StateId s = siter.Value();
240    if (s == ifst.Start())
241      ofst->SetStart(s);
242
243    ofst->ReserveArcs(s, ifst.NumArcs(s));
244    for (ArcIterator< Fst<A> > aiter(ifst, s); !aiter.Done(); aiter.Next())
245      ofst->AddArc(s, (*mapper)(aiter.Value()));
246
247    switch (final_action) {
248      case MAP_NO_SUPERFINAL:
249      default: {
250        B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId));
251        if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
252          FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc";
253          ofst->SetProperties(kError, kError);
254        }
255        ofst->SetFinal(s, final_arc.weight);
256        break;
257      }
258      case MAP_ALLOW_SUPERFINAL: {
259        B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId));
260        if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
261            // Add a superfinal state if not already done.
262          if (superfinal == kNoStateId) {
263            superfinal = ofst->AddState();
264            ofst->SetFinal(superfinal, B::Weight::One());
265          }
266          final_arc.nextstate = superfinal;
267          ofst->AddArc(s, final_arc);
268          ofst->SetFinal(s, B::Weight::Zero());
269        } else {
270          ofst->SetFinal(s, final_arc.weight);
271        }
272        break;
273      }
274      case MAP_REQUIRE_SUPERFINAL: {
275        B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId));
276        if (final_arc.ilabel != 0 || final_arc.olabel != 0 ||
277            final_arc.weight != B::Weight::Zero())
278          ofst->AddArc(s, B(final_arc.ilabel, final_arc.olabel,
279                            final_arc.weight, superfinal));
280        ofst->SetFinal(s, B::Weight::Zero());
281        break;
282      }
283    }
284  }
285  uint64 oprops = ofst->Properties(kFstProperties, false);
286  ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties);
287}
288
289// Maps an arc type A to an arc type B using mapper function
290// object C, passed by value. This version writes the mapped input
291// Fst to an output MutableFst.
292template<class A, class B, class C>
293void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) {
294  ArcMap(ifst, ofst, &mapper);
295}
296
297
298struct ArcMapFstOptions : public CacheOptions {
299  // ArcMapFst default caching behaviour is to do no caching. Most
300  // mappers are cheap and therefore we save memory by not doing
301  // caching.
302  ArcMapFstOptions() : CacheOptions(true, 0) {}
303  ArcMapFstOptions(const CacheOptions& opts) : CacheOptions(opts) {}
304};
305
306
307template <class A, class B, class C> class ArcMapFst;
308
309// Implementation of delayed ArcMapFst.
310template <class A, class B, class C>
311class ArcMapFstImpl : public CacheImpl<B> {
312 public:
313  using FstImpl<B>::SetType;
314  using FstImpl<B>::SetProperties;
315  using FstImpl<B>::SetInputSymbols;
316  using FstImpl<B>::SetOutputSymbols;
317
318  using VectorFstBaseImpl<typename CacheImpl<B>::State>::NumStates;
319
320  using CacheImpl<B>::PushArc;
321  using CacheImpl<B>::HasArcs;
322  using CacheImpl<B>::HasFinal;
323  using CacheImpl<B>::HasStart;
324  using CacheImpl<B>::SetArcs;
325  using CacheImpl<B>::SetFinal;
326  using CacheImpl<B>::SetStart;
327
328  friend class StateIterator< ArcMapFst<A, B, C> >;
329
330  typedef B Arc;
331  typedef typename B::Weight Weight;
332  typedef typename B::StateId StateId;
333
334  ArcMapFstImpl(const Fst<A> &fst, const C &mapper,
335                 const ArcMapFstOptions& opts)
336      : CacheImpl<B>(opts),
337        fst_(fst.Copy()),
338        mapper_(new C(mapper)),
339        own_mapper_(true),
340        superfinal_(kNoStateId),
341        nstates_(0) {
342    Init();
343  }
344
345  ArcMapFstImpl(const Fst<A> &fst, C *mapper,
346                 const ArcMapFstOptions& opts)
347      : CacheImpl<B>(opts),
348        fst_(fst.Copy()),
349        mapper_(mapper),
350        own_mapper_(false),
351        superfinal_(kNoStateId),
352        nstates_(0) {
353    Init();
354  }
355
356  ArcMapFstImpl(const ArcMapFstImpl<A, B, C> &impl)
357      : CacheImpl<B>(impl),
358        fst_(impl.fst_->Copy(true)),
359        mapper_(new C(*impl.mapper_)),
360        own_mapper_(true),
361        superfinal_(kNoStateId),
362        nstates_(0) {
363    Init();
364  }
365
366  ~ArcMapFstImpl() {
367    delete fst_;
368    if (own_mapper_) delete mapper_;
369  }
370
371  StateId Start() {
372    if (!HasStart())
373      SetStart(FindOState(fst_->Start()));
374    return CacheImpl<B>::Start();
375  }
376
377  Weight Final(StateId s) {
378    if (!HasFinal(s)) {
379      switch (final_action_) {
380        case MAP_NO_SUPERFINAL:
381        default: {
382          B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
383                                        kNoStateId));
384          if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
385            FSTERROR() << "ArcMapFst: non-zero arc labels for superfinal arc";
386            SetProperties(kError, kError);
387          }
388          SetFinal(s, final_arc.weight);
389          break;
390        }
391        case MAP_ALLOW_SUPERFINAL: {
392          if (s == superfinal_) {
393            SetFinal(s, Weight::One());
394          } else {
395            B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
396                                          kNoStateId));
397            if (final_arc.ilabel == 0 && final_arc.olabel == 0)
398              SetFinal(s, final_arc.weight);
399            else
400              SetFinal(s, Weight::Zero());
401          }
402          break;
403        }
404        case MAP_REQUIRE_SUPERFINAL: {
405          SetFinal(s, s == superfinal_ ? Weight::One() : Weight::Zero());
406          break;
407        }
408      }
409    }
410    return CacheImpl<B>::Final(s);
411  }
412
413  size_t NumArcs(StateId s) {
414    if (!HasArcs(s))
415      Expand(s);
416    return CacheImpl<B>::NumArcs(s);
417  }
418
419  size_t NumInputEpsilons(StateId s) {
420    if (!HasArcs(s))
421      Expand(s);
422    return CacheImpl<B>::NumInputEpsilons(s);
423  }
424
425  size_t NumOutputEpsilons(StateId s) {
426    if (!HasArcs(s))
427      Expand(s);
428    return CacheImpl<B>::NumOutputEpsilons(s);
429  }
430
431  uint64 Properties() const { return Properties(kFstProperties); }
432
433  // Set error if found; return FST impl properties.
434  uint64 Properties(uint64 mask) const {
435    if ((mask & kError) && (fst_->Properties(kError, false) ||
436                            (mapper_->Properties(0) & kError)))
437      SetProperties(kError, kError);
438    return FstImpl<Arc>::Properties(mask);
439  }
440
441  void InitArcIterator(StateId s, ArcIteratorData<B> *data) {
442    if (!HasArcs(s))
443      Expand(s);
444    CacheImpl<B>::InitArcIterator(s, data);
445  }
446
447  void Expand(StateId s) {
448    // Add exiting arcs.
449    if (s == superfinal_) { SetArcs(s); return; }
450
451    for (ArcIterator< Fst<A> > aiter(*fst_, FindIState(s));
452         !aiter.Done(); aiter.Next()) {
453      A aarc(aiter.Value());
454      aarc.nextstate = FindOState(aarc.nextstate);
455      const B& barc = (*mapper_)(aarc);
456      PushArc(s, barc);
457    }
458
459    // Check for superfinal arcs.
460    if (!HasFinal(s) || Final(s) == Weight::Zero())
461      switch (final_action_) {
462        case MAP_NO_SUPERFINAL:
463        default:
464          break;
465        case MAP_ALLOW_SUPERFINAL: {
466          B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
467                                        kNoStateId));
468          if (final_arc.ilabel != 0 || final_arc.olabel != 0) {
469            if (superfinal_ == kNoStateId)
470              superfinal_ = nstates_++;
471            final_arc.nextstate = superfinal_;
472            PushArc(s, final_arc);
473          }
474          break;
475        }
476      case MAP_REQUIRE_SUPERFINAL: {
477        B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)),
478                                      kNoStateId));
479        if (final_arc.ilabel != 0 || final_arc.olabel != 0 ||
480            final_arc.weight != B::Weight::Zero())
481          PushArc(s, B(final_arc.ilabel, final_arc.olabel,
482                      final_arc.weight, superfinal_));
483        break;
484      }
485    }
486    SetArcs(s);
487  }
488
489 private:
490  void Init() {
491    SetType("map");
492
493    if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS)
494      SetInputSymbols(fst_->InputSymbols());
495    else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS)
496      SetInputSymbols(0);
497
498    if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS)
499      SetOutputSymbols(fst_->OutputSymbols());
500    else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS)
501      SetOutputSymbols(0);
502
503    if (fst_->Start() == kNoStateId) {
504      final_action_ = MAP_NO_SUPERFINAL;
505      SetProperties(kNullProperties);
506    } else {
507      final_action_ = mapper_->FinalAction();
508      uint64 props = fst_->Properties(kCopyProperties, false);
509      SetProperties(mapper_->Properties(props));
510      if (final_action_ == MAP_REQUIRE_SUPERFINAL)
511        superfinal_ = 0;
512    }
513  }
514
515  // Maps from output state to input state.
516  StateId FindIState(StateId s) {
517    if (superfinal_ == kNoStateId || s < superfinal_)
518      return s;
519    else
520      return s - 1;
521  }
522
523  // Maps from input state to output state.
524  StateId FindOState(StateId is) {
525    StateId os;
526    if (superfinal_ == kNoStateId || is < superfinal_)
527      os = is;
528    else
529      os = is + 1;
530
531    if (os >= nstates_)
532      nstates_ = os + 1;
533
534    return os;
535  }
536
537
538  const Fst<A> *fst_;
539  C*   mapper_;
540  bool own_mapper_;
541  MapFinalAction final_action_;
542
543  StateId superfinal_;
544  StateId nstates_;
545
546  void operator=(const ArcMapFstImpl<A, B, C> &);  // disallow
547};
548
549
550// Maps an arc type A to an arc type B using Mapper function object
551// C. This version is a delayed Fst.
552template <class A, class B, class C>
553class ArcMapFst : public ImplToFst< ArcMapFstImpl<A, B, C> > {
554 public:
555  friend class ArcIterator< ArcMapFst<A, B, C> >;
556  friend class StateIterator< ArcMapFst<A, B, C> >;
557
558  typedef B Arc;
559  typedef typename B::Weight Weight;
560  typedef typename B::StateId StateId;
561  typedef CacheState<B> State;
562  typedef ArcMapFstImpl<A, B, C> Impl;
563
564  ArcMapFst(const Fst<A> &fst, const C &mapper, const ArcMapFstOptions& opts)
565      : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {}
566
567  ArcMapFst(const Fst<A> &fst, C* mapper, const ArcMapFstOptions& opts)
568      : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {}
569
570  ArcMapFst(const Fst<A> &fst, const C &mapper)
571      : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {}
572
573  ArcMapFst(const Fst<A> &fst, C* mapper)
574      : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {}
575
576  // See Fst<>::Copy() for doc.
577  ArcMapFst(const ArcMapFst<A, B, C> &fst, bool safe = false)
578    : ImplToFst<Impl>(fst, safe) {}
579
580  // Get a copy of this ArcMapFst. See Fst<>::Copy() for further doc.
581  virtual ArcMapFst<A, B, C> *Copy(bool safe = false) const {
582    return new ArcMapFst<A, B, C>(*this, safe);
583  }
584
585  virtual inline void InitStateIterator(StateIteratorData<B> *data) const;
586
587  virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const {
588    GetImpl()->InitArcIterator(s, data);
589  }
590
591 private:
592  // Makes visible to friends.
593  Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
594
595  void operator=(const ArcMapFst<A, B, C> &fst);  // disallow
596};
597
598
599// Specialization for ArcMapFst.
600template<class A, class B, class C>
601class StateIterator< ArcMapFst<A, B, C> > : public StateIteratorBase<B> {
602 public:
603  typedef typename B::StateId StateId;
604
605  explicit StateIterator(const ArcMapFst<A, B, C> &fst)
606      : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0),
607        superfinal_(impl_->final_action_ == MAP_REQUIRE_SUPERFINAL)
608  { CheckSuperfinal(); }
609
610  bool Done() const { return siter_.Done() && !superfinal_; }
611
612  StateId Value() const { return s_; }
613
614  void Next() {
615    ++s_;
616    if (!siter_.Done()) {
617      siter_.Next();
618      CheckSuperfinal();
619    }
620    else if (superfinal_)
621      superfinal_ = false;
622  }
623
624  void Reset() {
625    s_ = 0;
626    siter_.Reset();
627    superfinal_ = impl_->final_action_ == MAP_REQUIRE_SUPERFINAL;
628    CheckSuperfinal();
629  }
630
631 private:
632  // This allows base-class virtual access to non-virtual derived-
633  // class members of the same name. It makes the derived class more
634  // efficient to use but unsafe to further derive.
635  bool Done_() const { return Done(); }
636  StateId Value_() const { return Value(); }
637  void Next_() { Next(); }
638  void Reset_() { Reset(); }
639
640  void CheckSuperfinal() {
641    if (impl_->final_action_ != MAP_ALLOW_SUPERFINAL || superfinal_)
642      return;
643    if (!siter_.Done()) {
644      B final_arc = (*impl_->mapper_)(A(0, 0, impl_->fst_->Final(s_),
645                                           kNoStateId));
646      if (final_arc.ilabel != 0 || final_arc.olabel != 0)
647        superfinal_ = true;
648    }
649  }
650
651  const ArcMapFstImpl<A, B, C> *impl_;
652  StateIterator< Fst<A> > siter_;
653  StateId s_;
654  bool superfinal_;    // true if there is a superfinal state and not done
655
656  DISALLOW_COPY_AND_ASSIGN(StateIterator);
657};
658
659
660// Specialization for ArcMapFst.
661template <class A, class B, class C>
662class ArcIterator< ArcMapFst<A, B, C> >
663    : public CacheArcIterator< ArcMapFst<A, B, C> > {
664 public:
665  typedef typename A::StateId StateId;
666
667  ArcIterator(const ArcMapFst<A, B, C> &fst, StateId s)
668      : CacheArcIterator< ArcMapFst<A, B, C> >(fst.GetImpl(), s) {
669    if (!fst.GetImpl()->HasArcs(s))
670      fst.GetImpl()->Expand(s);
671  }
672
673 private:
674  DISALLOW_COPY_AND_ASSIGN(ArcIterator);
675};
676
677template <class A, class B, class C> inline
678void ArcMapFst<A, B, C>::InitStateIterator(StateIteratorData<B> *data)
679    const {
680  data->base = new StateIterator< ArcMapFst<A, B, C> >(*this);
681}
682
683
684//
685// Utility Mappers
686//
687
688// Mapper that returns its input.
689template <class A>
690struct IdentityArcMapper {
691  typedef A FromArc;
692  typedef A ToArc;
693
694  A operator()(const A &arc) const { return arc; }
695
696  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
697
698  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
699
700  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
701
702  uint64 Properties(uint64 props) const { return props; }
703};
704
705
706// Mapper that returns its input with final states redirected to
707// a single super-final state.
708template <class A>
709struct SuperFinalMapper {
710  typedef A FromArc;
711  typedef A ToArc;
712
713  A operator()(const A &arc) const { return arc; }
714
715  MapFinalAction FinalAction() const { return MAP_REQUIRE_SUPERFINAL; }
716
717  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
718
719  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
720
721  uint64 Properties(uint64 props) const {
722    return props & kAddSuperFinalProperties;
723  }
724};
725
726
727// Mapper that leaves labels and nextstate unchanged and constructs a new weight
728// from the underlying value of the arc weight.  Requires that there is a
729// WeightConvert class specialization that converts the weights.
730template <class A, class B>
731class WeightConvertMapper {
732 public:
733  typedef A FromArc;
734  typedef B ToArc;
735  typedef typename FromArc::Weight FromWeight;
736  typedef typename ToArc::Weight ToWeight;
737
738  ToArc operator()(const FromArc &arc) const {
739    return ToArc(arc.ilabel, arc.olabel,
740                 convert_weight_(arc.weight), arc.nextstate);
741  }
742
743  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
744
745  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
746
747  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
748
749  uint64 Properties(uint64 props) const { return props; }
750
751 private:
752  WeightConvert<FromWeight, ToWeight> convert_weight_;
753};
754
755// Non-precision-changing weight conversions.
756// Consider using more efficient Cast (fst.h) instead.
757typedef WeightConvertMapper<StdArc, LogArc> StdToLogMapper;
758typedef WeightConvertMapper<LogArc, StdArc> LogToStdMapper;
759
760// Precision-changing weight conversions.
761typedef WeightConvertMapper<StdArc, Log64Arc> StdToLog64Mapper;
762typedef WeightConvertMapper<LogArc, Log64Arc> LogToLog64Mapper;
763typedef WeightConvertMapper<Log64Arc, StdArc> Log64ToStdMapper;
764typedef WeightConvertMapper<Log64Arc, LogArc> Log64ToLogMapper;
765
766// Mapper from A to GallicArc<A>.
767template <class A, StringType S = STRING_LEFT>
768struct ToGallicMapper {
769  typedef A FromArc;
770  typedef GallicArc<A, S> ToArc;
771
772  typedef StringWeight<typename A::Label, S> SW;
773  typedef typename A::Weight AW;
774  typedef typename GallicArc<A, S>::Weight GW;
775
776  ToArc operator()(const A &arc) const {
777    // 'Super-final' arc.
778    if (arc.nextstate == kNoStateId && arc.weight != AW::Zero())
779      return ToArc(0, 0, GW(SW::One(), arc.weight), kNoStateId);
780    // 'Super-non-final' arc.
781    else if (arc.nextstate == kNoStateId)
782      return ToArc(0, 0, GW(SW::Zero(), arc.weight), kNoStateId);
783    // Epsilon label.
784    else if (arc.olabel == 0)
785      return ToArc(arc.ilabel, arc.ilabel,
786                   GW(SW::One(), arc.weight), arc.nextstate);
787    // Regular label.
788    else
789      return ToArc(arc.ilabel, arc.ilabel,
790                   GW(SW(arc.olabel), arc.weight), arc.nextstate);
791  }
792
793  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
794
795  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
796
797  MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;}
798
799  uint64 Properties(uint64 props) const {
800    return ProjectProperties(props, true) & kWeightInvariantProperties;
801  }
802};
803
804
805// Mapper from GallicArc<A> to A.
806template <class A, StringType S = STRING_LEFT>
807struct FromGallicMapper {
808  typedef GallicArc<A, S> FromArc;
809  typedef A ToArc;
810
811  typedef typename A::Label Label;
812  typedef StringWeight<Label, S> SW;
813  typedef typename A::Weight AW;
814  typedef typename GallicArc<A, S>::Weight GW;
815
816  FromGallicMapper(Label superfinal_label = 0)
817      : superfinal_label_(superfinal_label), error_(false) {}
818
819  A operator()(const FromArc &arc) const {
820    // 'Super-non-final' arc.
821    if (arc.nextstate == kNoStateId && arc.weight == GW::Zero())
822      return A(arc.ilabel, 0, AW::Zero(), kNoStateId);
823
824    SW w1 = arc.weight.Value1();
825    AW w2 = arc.weight.Value2();
826    StringWeightIterator<Label, S> iter1(w1);
827
828    Label l = w1.Size() == 1 ? iter1.Value() : 0;
829
830    if (l == kStringInfinity || l == kStringBad ||
831        arc.ilabel != arc.olabel || w1.Size() > 1) {
832      FSTERROR() << "FromGallicMapper: unrepesentable weight";
833      error_ = true;
834    }
835
836    if (arc.ilabel == 0 && l != 0 && arc.nextstate == kNoStateId)
837      return A(superfinal_label_, l, w2, arc.nextstate);
838    else
839      return A(arc.ilabel, l, w2, arc.nextstate);
840  }
841
842  MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; }
843
844  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
845
846  MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;}
847
848  uint64 Properties(uint64 inprops) const {
849    uint64 outprops = inprops & kOLabelInvariantProperties &
850        kWeightInvariantProperties & kAddSuperFinalProperties;
851    if (error_)
852      outprops |= kError;
853    return outprops;
854  }
855
856 private:
857  Label superfinal_label_;
858  mutable bool error_;
859};
860
861
862// Mapper from GallicArc<A> to A.
863template <class A, StringType S = STRING_LEFT>
864struct GallicToNewSymbolsMapper {
865  typedef GallicArc<A, S> FromArc;
866  typedef A ToArc;
867
868  typedef typename A::StateId StateId;
869  typedef typename A::Label Label;
870  typedef StringWeight<Label, S> SW;
871  typedef typename A::Weight AW;
872  typedef typename GallicArc<A, S>::Weight GW;
873
874  GallicToNewSymbolsMapper(MutableFst<ToArc> *fst)
875      : fst_(fst), lmax_(0), osymbols_(fst->OutputSymbols()),
876        isymbols_(0), error_(false) {
877    fst_->DeleteStates();
878    state_ = fst_->AddState();
879    fst_->SetStart(state_);
880    fst_->SetFinal(state_, AW::One());
881    if (osymbols_) {
882      string name = osymbols_->Name() + "_from_gallic";
883      fst_->SetInputSymbols(new SymbolTable(name));
884      isymbols_ = fst_->MutableInputSymbols();
885      isymbols_->AddSymbol(osymbols_->Find((int64) 0), 0);
886    } else {
887      fst_->SetInputSymbols(0);
888    }
889  }
890
891  A operator()(const FromArc &arc) {
892    // 'Super-non-final' arc.
893    if (arc.nextstate == kNoStateId && arc.weight == GW::Zero())
894      return A(arc.ilabel, 0, AW::Zero(), kNoStateId);
895
896    SW w1 = arc.weight.Value1();
897    AW w2 = arc.weight.Value2();
898    Label l;
899
900    if (w1.Size() == 0) {
901      l = 0;
902    } else {
903      typename Map::iterator miter = map_.find(w1);
904      if (miter != map_.end()) {
905        l = (*miter).second;
906      } else {
907        l = ++lmax_;
908        map_.insert(pair<const SW, Label>(w1, l));
909        StringWeightIterator<Label, S> iter1(w1);
910        StateId n;
911        string s;
912        for(size_t i = 0, p = state_;
913            i < w1.Size();
914            ++i, iter1.Next(), p = n) {
915          n = i == w1.Size() - 1 ? state_ : fst_->AddState();
916          fst_->AddArc(p, ToArc(i ? 0 : l, iter1.Value(), AW::One(), n));
917          if (isymbols_) {
918            if (i) s = s + "_";
919            s = s + osymbols_->Find(iter1.Value());
920          }
921        }
922        if (isymbols_)
923          isymbols_->AddSymbol(s, l);
924      }
925    }
926
927    if (l == kStringInfinity || l == kStringBad || arc.ilabel != arc.olabel) {
928      FSTERROR() << "GallicToNewSymbolMapper: unrepesentable weight";
929      error_ = true;
930    }
931
932    return A(arc.ilabel, l, w2, arc.nextstate);
933  }
934
935  MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; }
936
937  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
938
939  MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
940
941  uint64 Properties(uint64 inprops) const {
942    uint64 outprops = inprops & kOLabelInvariantProperties &
943        kWeightInvariantProperties & kAddSuperFinalProperties;
944    if (error_)
945      outprops |= kError;
946    return outprops;
947  }
948
949 private:
950  class StringKey {
951   public:
952    size_t operator()(const SW &x) const {
953      return x.Hash();
954    }
955  };
956
957  typedef unordered_map<SW, Label, StringKey> Map;
958
959  MutableFst<ToArc> *fst_;
960  Map map_;
961  Label lmax_;
962  StateId state_;
963  const SymbolTable *osymbols_;
964  SymbolTable *isymbols_;
965  mutable bool error_;
966
967  DISALLOW_COPY_AND_ASSIGN(GallicToNewSymbolsMapper);
968};
969
970
971// Mapper to add a constant to all weights.
972template <class A>
973struct PlusMapper {
974  typedef A FromArc;
975  typedef A ToArc;
976  typedef typename A::Weight Weight;
977
978  explicit PlusMapper(Weight w) : weight_(w) {}
979
980  A operator()(const A &arc) const {
981    if (arc.weight == Weight::Zero())
982      return arc;
983    Weight w = Plus(arc.weight, weight_);
984    return A(arc.ilabel, arc.olabel, w, arc.nextstate);
985  }
986
987  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
988
989  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
990
991  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
992
993  uint64 Properties(uint64 props) const {
994    return props & kWeightInvariantProperties;
995  }
996
997 private:
998
999
1000
1001  Weight weight_;
1002};
1003
1004
1005// Mapper to (right) multiply a constant to all weights.
1006template <class A>
1007struct TimesMapper {
1008  typedef A FromArc;
1009  typedef A ToArc;
1010  typedef typename A::Weight Weight;
1011
1012  explicit TimesMapper(Weight w) : weight_(w) {}
1013
1014  A operator()(const A &arc) const {
1015    if (arc.weight == Weight::Zero())
1016      return arc;
1017    Weight w = Times(arc.weight, weight_);
1018    return A(arc.ilabel, arc.olabel, w, arc.nextstate);
1019  }
1020
1021  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
1022
1023  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
1024
1025  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
1026
1027  uint64 Properties(uint64 props) const {
1028    return props & kWeightInvariantProperties;
1029  }
1030
1031 private:
1032  Weight weight_;
1033};
1034
1035
1036// Mapper to reciprocate all non-Zero() weights.
1037template <class A>
1038struct InvertWeightMapper {
1039  typedef A FromArc;
1040  typedef A ToArc;
1041  typedef typename A::Weight Weight;
1042
1043  A operator()(const A &arc) const {
1044    if (arc.weight == Weight::Zero())
1045      return arc;
1046    Weight w = Divide(Weight::One(), arc.weight);
1047    return A(arc.ilabel, arc.olabel, w, arc.nextstate);
1048  }
1049
1050  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
1051
1052  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
1053
1054  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
1055
1056  uint64 Properties(uint64 props) const {
1057    return props & kWeightInvariantProperties;
1058  }
1059};
1060
1061
1062// Mapper to map all non-Zero() weights to One().
1063template <class A, class B = A>
1064struct RmWeightMapper {
1065  typedef A FromArc;
1066  typedef B ToArc;
1067  typedef typename FromArc::Weight FromWeight;
1068  typedef typename ToArc::Weight ToWeight;
1069
1070  B operator()(const A &arc) const {
1071    ToWeight w = arc.weight != FromWeight::Zero() ?
1072                   ToWeight::One() : ToWeight::Zero();
1073    return B(arc.ilabel, arc.olabel, w, arc.nextstate);
1074  }
1075
1076  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
1077
1078  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
1079
1080  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
1081
1082  uint64 Properties(uint64 props) const {
1083    return (props & kWeightInvariantProperties) | kUnweighted;
1084  }
1085};
1086
1087
1088// Mapper to quantize all weights.
1089template <class A, class B = A>
1090struct QuantizeMapper {
1091  typedef A FromArc;
1092  typedef B ToArc;
1093  typedef typename FromArc::Weight FromWeight;
1094  typedef typename ToArc::Weight ToWeight;
1095
1096  QuantizeMapper() : delta_(kDelta) {}
1097
1098  explicit QuantizeMapper(float d) : delta_(d) {}
1099
1100  B operator()(const A &arc) const {
1101    ToWeight w = arc.weight.Quantize(delta_);
1102    return B(arc.ilabel, arc.olabel, w, arc.nextstate);
1103  }
1104
1105  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
1106
1107  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
1108
1109  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
1110
1111  uint64 Properties(uint64 props) const {
1112    return props & kWeightInvariantProperties;
1113  }
1114
1115 private:
1116  float delta_;
1117};
1118
1119
1120// Mapper from A to B under the assumption:
1121//    B::Weight = A::Weight::ReverseWeight
1122//    B::Label == A::Label
1123//    B::StateId == A::StateId
1124// The weight is reversed, while the label and nextstate preserved
1125// in the mapping.
1126template <class A, class B>
1127struct ReverseWeightMapper {
1128  typedef A FromArc;
1129  typedef B ToArc;
1130
1131  B operator()(const A &arc) const {
1132    return B(arc.ilabel, arc.olabel, arc.weight.Reverse(), arc.nextstate);
1133  }
1134
1135  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
1136
1137  MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
1138
1139  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;}
1140
1141  uint64 Properties(uint64 props) const { return props; }
1142};
1143
1144}  // namespace fst
1145
1146#endif  // FST_LIB_ARC_MAP_H__
1147