edit-fst.h revision 5b6dc79427b8f7eeb6a7ff68034ab8548ce670ea
1010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
2010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// Licensed under the Apache License, Version 2.0 (the "License");
3010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// you may not use this file except in compliance with the License.
4010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// You may obtain a copy of the License at
5010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//
6010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//     http://www.apache.org/licenses/LICENSE-2.0
7010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//
8010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// Unless required by applicable law or agreed to in writing, software
9010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// distributed under the License is distributed on an "AS IS" BASIS,
10010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// See the License for the specific language governing permissions and
12f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// limitations under the License.
13f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//
14f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Copyright 2005-2010 Google, Inc.
15f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Author: dbikel@google.com (Dan Bikel)
16010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//
17f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// An \ref Fst implementation that allows non-destructive edit operations on an
1846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)// existing fst.
19f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
20f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#ifndef FST_LIB_EDIT_FST_H_
21f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#define FST_LIB_EDIT_FST_H_
22f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
23010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include <vector>
24f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)using std::vector;
25f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
26010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include <fst/cache.h>
27010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
28010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include <tr1/unordered_map>
29010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)using std::tr1::unordered_map;
30010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)using std::tr1::unordered_multimap;
31010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
32f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)namespace fst {
33f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
34f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// The EditFst class enables non-destructive edit operations on a wrapped
35f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// ExpandedFst. The implementation uses copy-on-write semantics at the node
36f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// level: if a user has an underlying fst on which he or she wants to perform a
37f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// relatively small number of edits (read: mutations), then this implementation
38f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// will copy the edited node to an internal MutableFst and perform any edits in
39f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// situ on that copied node. This class supports all the methods of MutableFst
40f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// except for DeleteStates(const vector<StateId> &); thus, new nodes may also be
41f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// added, and one may add transitions from existing nodes of the wrapped fst to
42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// new nodes.
43f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//
44f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// N.B.: The documentation for Fst::Copy(true) says that its behavior is
45f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// undefined if invoked on an fst that has already been accessed.  This class
46f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// requires that the Fst implementation it wraps provides consistent, reliable
47f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// behavior when its Copy(true) method is invoked, where consistent means
48f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// the graph structure, graph properties and state numbering and do not change.
49f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// VectorFst and CompactFst, for example, are both well-behaved in this regard.
50f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
51f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// The EditFstData class is a container for all mutable data for EditFstImpl;
52f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// also, this class provides most of the actual implementation of what EditFst
53f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// does (that is, most of EditFstImpl's methods delegate to methods in this, the
54f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// EditFstData class).  Instances of this class are reference-counted and can be
55f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// shared between otherwise independent EditFstImpl instances. This scheme
56f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// allows EditFstImpl to implement the thread-safe, copy-on-write semantics
57f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// required by Fst::Copy(true).
58f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//
59f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// template parameters:
60f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//   A the type of arc to use
61f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//   WrappedFstT the type of fst wrapped by the EditFst instance that
62f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//     this EditFstData instance is backing
63f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//   MutableFstT the type of mutable fst to use internally for edited states;
64f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//     crucially, MutableFstT::Copy(false) *must* yield an fst that is
6546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//     thread-safe for reading (VectorFst, for example, has this property)
6646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)template <typename A,
67010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)          typename WrappedFstT = ExpandedFst<A>,
68010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)          typename MutableFstT = VectorFst<A> >
69010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)class EditFstData {
70010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) public:
71010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  typedef A Arc;
72010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  typedef typename A::Weight Weight;
73010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  typedef typename A::StateId StateId;
74010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  typedef typename unordered_map<StateId, StateId>::const_iterator
75010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      IdMapIterator;
76010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  typedef typename unordered_map<StateId, Weight>::const_iterator
77010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      FinalWeightIterator;
78010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
79010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
80f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  EditFstData() : num_new_states_(0) {
81010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    SetEmptyAndDeleteKeysForInternalMaps();
82010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  }
83010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
84010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  EditFstData(const EditFstData &other) :
85010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      edits_(other.edits_),
86010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      external_to_internal_ids_(other.external_to_internal_ids_),
87010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      edited_final_weights_(other.edited_final_weights_),
88010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      num_new_states_(other.num_new_states_) {
89f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
90010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
91f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  ~EditFstData() {
92010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  }
93010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
94010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  static EditFstData<A, WrappedFstT, MutableFstT> *Read(istream &strm,
95010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)                                                        const FstReadOptions &opts);
96010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
97f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  bool Write(ostream &strm, const FstWriteOptions &opts) const {
98f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // Serialize all private data members of this class.
99f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    FstWriteOptions edits_opts(opts);
100f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    edits_opts.write_header = true;  // Force writing contained header.
101f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    edits_.Write(strm, edits_opts);
102f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    WriteType(strm, external_to_internal_ids_);
103f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    WriteType(strm, edited_final_weights_);
104f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    WriteType(strm, num_new_states_);
105f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (!strm) {
106f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      LOG(ERROR) << "EditFstData::Write: write failed: " << opts.source;
107f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      return false;
108f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    }
109f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return true;
110f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
111f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
112f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  int RefCount() const { return ref_count_.count(); }
113f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  int IncrRefCount() { return ref_count_.Incr(); }
114f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  int DecrRefCount() { return ref_count_.Decr(); }
115f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
116f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  StateId NumNewStates() const {
117f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return num_new_states_;
118010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  }
119f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
120010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  // accessor methods for the fst holding edited states
121010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  StateId EditedStart() const {
122010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    return edits_.Start();
123f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
124f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
125f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Weight Final(StateId s, const WrappedFstT *wrapped) const {
126f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    FinalWeightIterator final_weight_it = GetFinalWeightIterator(s);
127f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (final_weight_it == NotInFinalWeightMap()) {
128f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      IdMapIterator it = GetEditedIdMapIterator(s);
129f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      return it == NotInEditedMap() ?
130f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)             wrapped->Final(s) : edits_.Final(it->second);
131010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    }
132010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    else {
133010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      return final_weight_it->second;
134010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    }
135f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
136f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
137010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  size_t NumArcs(StateId s, const WrappedFstT *wrapped) const {
138010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    IdMapIterator it = GetEditedIdMapIterator(s);
139f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return it == NotInEditedMap() ?
140f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)           wrapped->NumArcs(s) : edits_.NumArcs(it->second);
141f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
142f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
143010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  size_t NumInputEpsilons(StateId s, const WrappedFstT *wrapped) const {
144f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    IdMapIterator it = GetEditedIdMapIterator(s);
145f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return it == NotInEditedMap() ?
146f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)           wrapped->NumInputEpsilons(s) :
147f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)           edits_.NumInputEpsilons(it->second);
148f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
149010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
150f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  size_t NumOutputEpsilons(StateId s, const WrappedFstT *wrapped) const {
151010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    IdMapIterator it = GetEditedIdMapIterator(s);
152f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return it == NotInEditedMap() ?
153f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)           wrapped->NumOutputEpsilons(s) :
154f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)           edits_.NumOutputEpsilons(it->second);
155f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
156f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
157f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  void SetEditedProperties(uint64 props, uint64 mask) {
158f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    edits_.SetProperties(props, mask);
159f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
160f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
161f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // non-const MutableFst operations
162f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
163f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Sets the start state for this fst.
164f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  void SetStart(StateId s) {
165f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    edits_.SetStart(s);
166f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
167f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
168010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  // Sets the final state for this fst.
169f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  Weight SetFinal(StateId s, Weight w, const WrappedFstT *wrapped) {
170f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    Weight old_weight = Final(s, wrapped);
171f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    IdMapIterator it = GetEditedIdMapIterator(s);
172f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // if we haven't already edited state s, don't add it to edited_ (which can
173f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // be expensive if s has many transitions); just use the
174f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // edited_final_weights_ map
175f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (it == NotInEditedMap()) {
176f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      edited_final_weights_[s] = w;
177f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    }
178f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    else {
179010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      edits_.SetFinal(GetEditableInternalId(s, wrapped), w);
180010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    }
181010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    return old_weight;
182010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  }
183010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
184010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  // Adds a new state to this fst, initially with no arcs.
185010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  StateId AddState(StateId curr_num_states) {
186010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    StateId internal_state_id = edits_.AddState();
187010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    StateId external_state_id = curr_num_states;
188116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    external_to_internal_ids_[external_state_id] = internal_state_id;
189116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    num_new_states_++;
190116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    return external_state_id;
191116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  }
192116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
193116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  // Adds the specified arc to the specified state of this fst.
194116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  const A *AddArc(StateId s, const Arc &arc, const WrappedFstT *wrapped) {
195116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    StateId internal_id = GetEditableInternalId(s, wrapped);
196116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
197116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    size_t num_arcs = edits_.NumArcs(internal_id);
198f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    ArcIterator<MutableFstT> arc_it(edits_, internal_id);
199f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    const A *prev_arc = NULL;
200f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (num_arcs > 0) {
201f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // grab the final arc associated with this state in edits_
202f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      arc_it.Seek(num_arcs - 1);
203f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      prev_arc = &(arc_it.Value());
204f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    }
205f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    edits_.AddArc(internal_id, arc);
206f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return prev_arc;
207f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
208f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
209f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  void DeleteStates() {
210f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    edits_.DeleteStates();
211f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    num_new_states_ = 0;
212f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    external_to_internal_ids_.clear();
213f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    edited_final_weights_.clear();
214f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
215f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
216f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Removes all but the first n outgoing arcs of the specified state.
217f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  void DeleteArcs(StateId s, size_t n, const WrappedFstT *wrapped) {
218f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    edits_.DeleteArcs(GetEditableInternalId(s, wrapped), n);
219f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
220f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
221f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Removes all outgoing arcs from the specified state.
222f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  void DeleteArcs(StateId s, const WrappedFstT *wrapped) {
223f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    edits_.DeleteArcs(GetEditableInternalId(s, wrapped));
224f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
225f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
226f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // end methods for non-const MutableFst operations
227f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
228f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Provides information for the generic arc iterator.
229f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data,
230f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                       const WrappedFstT *wrapped) const {
231f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    IdMapIterator id_map_it = GetEditedIdMapIterator(s);
232f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (id_map_it == NotInEditedMap()) {
233f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      VLOG(3) << "EditFstData::InitArcIterator: iterating on state "
234f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)          << s << " of original fst";
235f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      wrapped->InitArcIterator(s, data);
236f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    } else {
237f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      VLOG(2) << "EditFstData::InitArcIterator: iterating on edited state "
238f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)          << s << " (internal state id: " << id_map_it->second << ")";
239f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      edits_.InitArcIterator(id_map_it->second, data);
240f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    }
241f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
242f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
243f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // Provides information for the generic mutable arc iterator.
244f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data,
245f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                              const WrappedFstT *wrapped) {
246f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    data->base =
247f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        new MutableArcIterator<MutableFstT>(&edits_,
248f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                                            GetEditableInternalId(s, wrapped));
249f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
250f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
251f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Prints out the map from external to internal state id's (for debugging
252f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // purposes).
253f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  void PrintMap() {
254f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    for (IdMapIterator map_it = external_to_internal_ids_.begin();
255f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        map_it != NotInEditedMap(); ++map_it) {
256f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      LOG(INFO) << "(external,internal)=("
257f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)          << map_it->first << "," << map_it->second << ")";
258f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    }
259f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
260f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
261f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
262f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) private:
263f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  void SetEmptyAndDeleteKeysForInternalMaps() {
264f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
265f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
266f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Returns the iterator of the map from external to internal state id's
267f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // of edits_ for the specified external state id.
268f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  IdMapIterator GetEditedIdMapIterator(StateId s) const {
269f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return external_to_internal_ids_.find(s);
270f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
271f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  IdMapIterator NotInEditedMap() const {
272f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return external_to_internal_ids_.end();
273f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
274f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
275f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  FinalWeightIterator GetFinalWeightIterator(StateId s) const {
276f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return edited_final_weights_.find(s);
277f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
278f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  FinalWeightIterator NotInFinalWeightMap() const {
279f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return edited_final_weights_.end();
280116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  }
281f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
282f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Returns the internal state id of the specified external id if the state has
283f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // already been made editable, or else copies the state from wrapped_
284f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // to edits_ and returns the state id of the newly editable state in edits_.
285f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //
286f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // \return makes the specified state editable if it isn't already and returns
287f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  //         its state id in edits_
288f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  StateId GetEditableInternalId(StateId s, const WrappedFstT *wrapped) {
289f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    IdMapIterator id_map_it = GetEditedIdMapIterator(s);
290f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (id_map_it == NotInEditedMap()) {
291f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      StateId new_internal_id = edits_.AddState();
292f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      VLOG(2) << "EditFstData::GetEditableInternalId: editing state " << s
293f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)          << " of original fst; new internal state id:" << new_internal_id;
294f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      external_to_internal_ids_[s] = new_internal_id;
295f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      for (ArcIterator< Fst<A> > arc_iterator(*wrapped, s);
296f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)          !arc_iterator.Done();
297f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)          arc_iterator.Next()) {
298f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        edits_.AddArc(new_internal_id, arc_iterator.Value());
299f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      }
300f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      // copy the final weight
301f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      FinalWeightIterator final_weight_it = GetFinalWeightIterator(s);
302f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      if (final_weight_it == NotInFinalWeightMap()) {
303f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        edits_.SetFinal(new_internal_id, wrapped->Final(s));
304f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      } else {
305f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        edits_.SetFinal(new_internal_id, final_weight_it->second);
306f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        edited_final_weights_.erase(s);
307f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      }
308f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      return new_internal_id;
309f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    } else {
310f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      return id_map_it->second;
311f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    }
312f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
313f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
314f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // A mutable fst (by default, a VectorFst) to contain new states, and/or
315f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // copies of states from a wrapped ExpandedFst that have been modified in
316f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // some way.
317f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  MutableFstT edits_;
318f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // A mapping from external state id's to the internal id's of states that
319f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // appear in edits_.
320f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  unordered_map<StateId, StateId> external_to_internal_ids_;
321f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // A mapping from external state id's to final state weights assigned to
322f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // those states.  The states in this map are *only* those whose final weight
323f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // has been modified; if any other part of the state has been modified,
324f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // the entire state is copied to edits_, and all modifications reside there.
325f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  unordered_map<StateId, Weight> edited_final_weights_;
326f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // The number of new states added to this mutable fst impl, which is <= the
327f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // number of states in edits_ (since edits_ contains both edited *and* new
328f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // states).
329f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  StateId num_new_states_;
330f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  RefCounter ref_count_;
331f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)};
332f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
333f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// EditFstData method implementations: just the Read method.
334f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)template <typename A, typename WrappedFstT, typename MutableFstT>
335f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)EditFstData<A, WrappedFstT, MutableFstT> *
336f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)EditFstData<A, WrappedFstT, MutableFstT>::Read(istream &strm,
337f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                                               const FstReadOptions &opts) {
338f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  EditFstData<A, WrappedFstT, MutableFstT> *data =
339f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      new EditFstData<A, WrappedFstT, MutableFstT>();
340f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // next read in MutabelFstT machine that stores edits
341f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  FstReadOptions edits_opts(opts);
342f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  edits_opts.header = 0;  // Contained header was written out, so read it in.
343f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
344f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Because our internal representation of edited states is a solid object
345f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // of type MutableFstT (defaults to VectorFst<A>) and not a pointer,
346f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // and because the static Read method allocates a new object on the heap,
347f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // we need to call Read, check if there was a failure, use
348f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // MutableFstT::operator= to assign the object (not the pointer) to the
349f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // edits_ data member (which will increase the ref count by 1 on the impl)
350f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // and, finally, delete the heap-allocated object.
351f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  MutableFstT *edits = MutableFstT::Read(strm, edits_opts);
352f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (!edits) {
353f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return 0;
354f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
355f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  data->edits_ = *edits;
356f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  delete edits;
357f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // finally, read in rest of private data members
358f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  ReadType(strm, &data->external_to_internal_ids_);
359f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  ReadType(strm, &data->edited_final_weights_);
360f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  ReadType(strm, &data->num_new_states_);
361f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (!strm) {
362f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    LOG(ERROR) << "EditFst::Read: read failed: " << opts.source;
363f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return 0;
364f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
365f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return data;
366f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
367f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
368f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// This class enables non-destructive edit operations on a wrapped ExpandedFst.
369f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// The implementation uses copy-on-write semantics at the node level: if a user
370f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// has an underlying fst on which he or she wants to perform a relatively small
371f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// number of edits (read: mutations), then this implementation will copy the
372f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// edited node to an internal MutableFst and perform any edits in situ on that
373f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// copied node. This class supports all the methods of MutableFst except for
374f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// DeleteStates(const vector<StateId> &); thus, new nodes may also be added, and
375f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// one may add transitions from existing nodes of the wrapped fst to new nodes.
376f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//
377f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// template parameters:
378f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//   A the type of arc to use
379f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//   WrappedFstT the type of fst wrapped by the EditFst instance that
380f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//     this EditFstImpl instance is backing
381f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//   MutableFstT the type of mutable fst to use internally for edited states;
382f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)//     crucially, MutableFstT::Copy(false) *must* yield an fst that is
383010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)//     thread-safe for reading (VectorFst, for example, has this property)
384010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)template <typename A,
385116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch          typename WrappedFstT = ExpandedFst<A>,
386f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)          typename MutableFstT = VectorFst<A> >
387f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)class EditFstImpl : public FstImpl<A> {
388f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) public:
389f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  using FstImpl<A>::SetProperties;
390f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  using FstImpl<A>::SetInputSymbols;
391f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  using FstImpl<A>::SetOutputSymbols;
392f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  using FstImpl<A>::WriteHeader;
393f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
3941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  typedef A Arc;
3951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  typedef typename Arc::Weight Weight;
3961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  typedef typename Arc::StateId StateId;
3971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // Constructs an editable fst implementation with no states.  Effectively,
3991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // this initially-empty fst will in every way mimic the behavior of
4001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // a VectorFst--more precisely, a VectorFstImpl instance--but with slightly
4011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // slower performance (by a constant factor), due to the fact that
4021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  // this class maintains a mapping between external state id's and
403f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // their internal equivalents.
404f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  EditFstImpl() {
405f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    FstImpl<A>::SetType("edit");
406f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    wrapped_ = new MutableFstT();
407f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    InheritPropertiesFromWrapped();
408f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    data_ = new EditFstData<A, WrappedFstT, MutableFstT>();
409f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
410f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
411f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // Wraps the specified ExpandedFst. This constructor requires that the
412f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // specified Fst is an ExpandedFst instance. This requirement is only enforced
413f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // at runtime. (See below for the reason.)
414116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  //
415f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // This library uses the pointer-to-implementation or "PIMPL" design pattern.
416f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // In particular, to make it convenient to bind an implementation class to its
417f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // interface, there are a pair of template "binder" classes, one for immutable
418f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // and one for mutable fst's (ImplToFst and ImplToMutableFst, respectively).
419f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // As it happens, the API for the ImplToMutableFst<I,F> class requires that
420f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // the implementation class--the template parameter "I"--have a constructor
421f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // taking a const Fst<A> reference.  Accordingly, the constructor here must
422f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // perform a static_cast to the WrappedFstT type required by EditFst and
423f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // therefore EditFstImpl.
424f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  explicit EditFstImpl(const Fst<A> &wrapped)
425010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      : wrapped_(static_cast<WrappedFstT *>(wrapped.Copy())) {
426f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    FstImpl<A>::SetType("edit");
427f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
428f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    data_ = new EditFstData<A, WrappedFstT, MutableFstT>();
429f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    // have edits_ inherit all properties from wrapped_
430010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    data_->SetEditedProperties(wrapped_->Properties(kFstProperties, false),
431f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                               kFstProperties);
432010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    InheritPropertiesFromWrapped();
433010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  }
434010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
435010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  // A copy constructor for this implementation class, used to implement
436010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  // the Copy() method of the Fst interface.
437  EditFstImpl(const EditFstImpl &impl)
438      : FstImpl<A>(),
439        wrapped_(static_cast<WrappedFstT *>(impl.wrapped_->Copy(true))),
440        data_(impl.data_) {
441    data_->IncrRefCount();
442    SetProperties(impl.Properties());
443  }
444
445  ~EditFstImpl() {
446    delete wrapped_;
447    if (!data_->DecrRefCount()) {
448      delete data_;
449    }
450  }
451
452  // const Fst/ExpandedFst operations, declared in the Fst and ExpandedFst
453  // interfaces
454  StateId Start() const {
455    StateId edited_start = data_->EditedStart();
456    return edited_start == kNoStateId ? wrapped_->Start() : edited_start;
457  }
458
459  Weight Final(StateId s) const {
460    return data_->Final(s, wrapped_);
461  }
462
463  size_t NumArcs(StateId s) const {
464    return data_->NumArcs(s, wrapped_);
465  }
466
467  size_t NumInputEpsilons(StateId s) const {
468    return data_->NumInputEpsilons(s, wrapped_);
469  }
470
471  size_t NumOutputEpsilons(StateId s) const {
472    return data_->NumOutputEpsilons(s, wrapped_);
473  }
474
475  StateId NumStates() const {
476    return wrapped_->NumStates() + data_->NumNewStates();
477  }
478
479  static EditFstImpl<A, WrappedFstT, MutableFstT> *
480  Read(istream &strm,
481       const FstReadOptions &opts);
482
483  bool Write(ostream &strm, const FstWriteOptions &opts) const {
484    FstHeader hdr;
485    hdr.SetStart(Start());
486    hdr.SetNumStates(NumStates());
487    FstWriteOptions header_opts(opts);
488    header_opts.write_isymbols = false;  // Let contained FST hold any symbols.
489    header_opts.write_osymbols = false;
490    WriteHeader(strm, header_opts, kFileVersion, &hdr);
491
492    // First, serialize wrapped fst to stream.
493    FstWriteOptions wrapped_opts(opts);
494    wrapped_opts.write_header = true;  // Force writing contained header.
495    wrapped_->Write(strm, wrapped_opts);
496
497    data_->Write(strm, opts);
498
499    strm.flush();
500    if (!strm) {
501      LOG(ERROR) << "EditFst::Write: write failed: " << opts.source;
502      return false;
503    }
504    return true;
505  }
506  // end const Fst operations
507
508  // non-const MutableFst operations
509
510  // Sets the start state for this fst.
511  void SetStart(StateId s) {
512    MutateCheck();
513    data_->SetStart(s);
514    SetProperties(SetStartProperties(FstImpl<A>::Properties()));
515  }
516
517  // Sets the final state for this fst.
518  void SetFinal(StateId s, Weight w) {
519    MutateCheck();
520    Weight old_weight = data_->SetFinal(s, w, wrapped_);
521    SetProperties(SetFinalProperties(FstImpl<A>::Properties(), old_weight, w));
522  }
523
524  // Adds a new state to this fst, initially with no arcs.
525  StateId AddState() {
526    MutateCheck();
527    SetProperties(AddStateProperties(FstImpl<A>::Properties()));
528    return data_->AddState(NumStates());
529  }
530
531  // Adds the specified arc to the specified state of this fst.
532  void AddArc(StateId s, const Arc &arc) {
533    MutateCheck();
534    const A *prev_arc = data_->AddArc(s, arc, wrapped_);
535    SetProperties(AddArcProperties(FstImpl<A>::Properties(), s, arc, prev_arc));
536  }
537
538  void DeleteStates(const vector<StateId>& dstates) {
539    FSTERROR() << ": EditFstImpl::DeleteStates(const std::vector<StateId>&): "
540               << " not implemented";
541    SetProperties(kError, kError);
542  }
543
544  // Deletes all states in this fst.
545  void DeleteStates();
546
547  // Removes all but the first n outgoing arcs of the specified state.
548  void DeleteArcs(StateId s, size_t n) {
549    MutateCheck();
550    data_->DeleteArcs(s, n, wrapped_);
551    SetProperties(DeleteArcsProperties(FstImpl<A>::Properties()));
552  }
553
554  // Removes all outgoing arcs from the specified state.
555  void DeleteArcs(StateId s) {
556    MutateCheck();
557    data_->DeleteArcs(s, wrapped_);
558    SetProperties(DeleteArcsProperties(FstImpl<A>::Properties()));
559  }
560
561  void ReserveStates(StateId s) {
562  }
563
564  void ReserveArcs(StateId s, size_t n) {
565  }
566
567  // end non-const MutableFst operations
568
569  // Provides information for the generic state iterator.
570  void InitStateIterator(StateIteratorData<Arc> *data) const {
571    data->base = 0;
572    data->nstates = NumStates();
573  }
574
575  // Provides information for the generic arc iterator.
576  void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
577    data_->InitArcIterator(s, data, wrapped_);
578  }
579
580  // Provides information for the generic mutable arc iterator.
581  void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) {
582    MutateCheck();
583    data_->InitMutableArcIterator(s, data, wrapped_);
584  }
585
586 private:
587  typedef typename unordered_map<StateId, StateId>::const_iterator
588    IdMapIterator;
589  typedef typename unordered_map<StateId, Weight>::const_iterator
590    FinalWeightIterator;
591  // Properties always true of this Fst class
592  static const uint64 kStaticProperties = kExpanded | kMutable;
593  // Current file format version
594  static const int kFileVersion = 2;
595  // Minimum file format version supported
596  static const int kMinFileVersion = 2;
597
598  // Causes this fst to inherit all the properties from its wrapped fst, except
599  // for the two properties that always apply to EditFst instances: kExpanded
600  // and kMutable.
601  void InheritPropertiesFromWrapped() {
602    SetProperties(wrapped_->Properties(kCopyProperties, false) |
603                  kStaticProperties);
604    SetInputSymbols(wrapped_->InputSymbols());
605    SetOutputSymbols(wrapped_->OutputSymbols());
606  }
607
608  // This method ensures that any operations that alter the mutable data
609  // portion of this EditFstImpl cause the data_ member to be copied when its
610  // reference count is greater than 1.  Note that this method is distinct from
611  // MutableFst::Mutate, which gets invoked whenever one of the basic mutation
612  // methods defined in MutableFst is invoked, such as SetInputSymbols.
613  // The MutateCheck here in EditFstImpl is invoked whenever one of the
614  // mutating methods specifically related to the types of edits provided
615  // by EditFst is performed, such as changing an arc of an existing state
616  // of the wrapped fst via a MutableArcIterator, or adding a new state via
617  // AddState().
618  void MutateCheck() {
619    if (data_->RefCount() > 1) {
620      EditFstData<A, WrappedFstT, MutableFstT> *data_copy =
621          new EditFstData<A, WrappedFstT, MutableFstT>(*data_);
622      if (data_ && !data_->DecrRefCount()) {
623        delete data_;
624      }
625      data_ = data_copy;
626    }
627  }
628
629  // The fst that this fst wraps.  The purpose of this class is to enable
630  // non-destructive edits on this wrapped fst.
631  const WrappedFstT *wrapped_;
632  // The mutable data for this EditFst instance, with delegates for all the
633  // methods that can mutate data.
634  EditFstData<A, WrappedFstT, MutableFstT> *data_;
635};
636
637template <typename A, typename WrappedFstT, typename MutableFstT>
638const uint64 EditFstImpl<A, WrappedFstT, MutableFstT>::kStaticProperties;
639
640// EditFstImpl IMPLEMENTATION STARTS HERE
641
642template<typename A, typename WrappedFstT, typename MutableFstT>
643inline void EditFstImpl<A, WrappedFstT, MutableFstT>::DeleteStates() {
644  data_->DeleteStates();
645  delete wrapped_;
646  // we are deleting all states, so just forget about pointer to wrapped_
647  // and do what default constructor does: set wrapped_ to a new VectorFst
648  wrapped_ = new MutableFstT();
649  uint64 newProps = DeleteAllStatesProperties(FstImpl<A>::Properties(),
650                                              kStaticProperties);
651  FstImpl<A>::SetProperties(newProps);
652}
653
654template <typename A, typename WrappedFstT, typename MutableFstT>
655EditFstImpl<A, WrappedFstT, MutableFstT> *
656EditFstImpl<A, WrappedFstT, MutableFstT>::Read(istream &strm,
657                                               const FstReadOptions &opts) {
658  EditFstImpl<A, WrappedFstT, MutableFstT> *impl = new EditFstImpl();
659  FstHeader hdr;
660  if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
661    return 0;
662  }
663  impl->SetStart(hdr.Start());
664
665  // first, read in wrapped fst
666  FstReadOptions wrapped_opts(opts);
667  wrapped_opts.header = 0;  // Contained header was written out, so read it in.
668  Fst<A> *wrapped_fst = Fst<A>::Read(strm, wrapped_opts);
669  if (!wrapped_fst) {
670    return 0;
671  }
672  impl->wrapped_ = static_cast<WrappedFstT *>(wrapped_fst);
673
674  impl->data_ = EditFstData<A, WrappedFstT, MutableFstT>::Read(strm, opts);
675
676  if (!impl->data_) {
677    delete wrapped_fst;
678    return 0;
679  }
680
681  return impl;
682}
683
684// END EditFstImpl IMPLEMENTATION
685
686// Concrete, editable FST.  This class attaches interface to implementation.
687template <typename A,
688          typename WrappedFstT = ExpandedFst<A>,
689          typename MutableFstT = VectorFst<A> >
690class EditFst :
691  public ImplToMutableFst< EditFstImpl<A, WrappedFstT, MutableFstT> > {
692 public:
693  friend class MutableArcIterator< EditFst<A, WrappedFstT, MutableFstT> >;
694
695  typedef A Arc;
696  typedef typename A::StateId StateId;
697  typedef EditFstImpl<A, WrappedFstT, MutableFstT> Impl;
698
699  EditFst() : ImplToMutableFst<Impl>(new Impl()) {}
700
701  explicit EditFst(const Fst<A> &fst) :
702      ImplToMutableFst<Impl>(new Impl(fst)) {}
703
704  explicit EditFst(const WrappedFstT &fst) :
705      ImplToMutableFst<Impl>(new Impl(fst)) {}
706
707  // See Fst<>::Copy() for doc.
708  EditFst(const EditFst<A, WrappedFstT, MutableFstT> &fst, bool safe = false) :
709    ImplToMutableFst<Impl>(fst, safe) {}
710
711  virtual ~EditFst() {}
712
713  // Get a copy of this EditFst. See Fst<>::Copy() for further doc.
714  virtual EditFst<A, WrappedFstT, MutableFstT> *Copy(bool safe = false) const {
715    return new EditFst<A, WrappedFstT, MutableFstT>(*this, safe);
716  }
717
718  EditFst<A, WrappedFstT, MutableFstT> &
719  operator=(const EditFst<A, WrappedFstT, MutableFstT> &fst) {
720    SetImpl(fst.GetImpl(), false);
721    return *this;
722  }
723
724  virtual EditFst<A, WrappedFstT, MutableFstT> &operator=(const Fst<A> &fst) {
725    if (this != &fst) {
726      SetImpl(new Impl(fst));
727    }
728    return *this;
729  }
730
731  // Read an EditFst from an input stream; return NULL on error.
732  static EditFst<A, WrappedFstT, MutableFstT> *
733  Read(istream &strm,
734       const FstReadOptions &opts) {
735    Impl* impl = Impl::Read(strm, opts);
736    return impl ? new EditFst<A>(impl) : 0;
737  }
738
739  // Read an EditFst from a file; return NULL on error.
740  // Empty filename reads from standard input.
741  static EditFst<A, WrappedFstT, MutableFstT> *Read(const string &filename) {
742    Impl* impl = ImplToExpandedFst<Impl, MutableFst<A> >::Read(filename);
743    return impl ? new EditFst<A, WrappedFstT, MutableFstT>(impl) : 0;
744  }
745
746  virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
747    return GetImpl()->Write(strm, opts);
748  }
749
750  virtual bool Write(const string &filename) const {
751    return Fst<A>::WriteFile(filename);
752  }
753
754  virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
755    GetImpl()->InitStateIterator(data);
756  }
757
758  virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
759    GetImpl()->InitArcIterator(s, data);
760  }
761
762  virtual
763  void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) {
764    GetImpl()->InitMutableArcIterator(s, data);
765  }
766 private:
767  explicit EditFst(Impl *impl) : ImplToMutableFst<Impl>(impl) {}
768
769  // Makes visible to friends.
770  Impl *GetImpl() const { return ImplToFst< Impl, MutableFst<A> >::GetImpl(); }
771
772  void SetImpl(Impl *impl, bool own_impl = true) {
773    ImplToFst< Impl, MutableFst<A> >::SetImpl(impl, own_impl);
774  }
775};
776
777}  // namespace fst
778
779#endif  // FST_LIB_EDIT_FST_H_
780