edit-fst.h revision 5b6dc79427b8f7eeb6a7ff68034ab8548ce670ea
1010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 2010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// Licensed under the Apache License, Version 2.0 (the "License"); 3010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// you may not use this file except in compliance with the License. 4010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// You may obtain a copy of the License at 5010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// 6010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// http://www.apache.org/licenses/LICENSE-2.0 7010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// 8010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// Unless required by applicable law or agreed to in writing, software 9010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// distributed under the License is distributed on an "AS IS" BASIS, 10010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// See the License for the specific language governing permissions and 12f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// limitations under the License. 13f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// 14f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Copyright 2005-2010 Google, Inc. 15f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Author: dbikel@google.com (Dan Bikel) 16010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// 17f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// An \ref Fst implementation that allows non-destructive edit operations on an 1846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)// existing fst. 19f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 20f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#ifndef FST_LIB_EDIT_FST_H_ 21f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#define FST_LIB_EDIT_FST_H_ 22f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 23010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include <vector> 24f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)using std::vector; 25f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 26010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include <fst/cache.h> 27010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 28010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)#include <tr1/unordered_map> 29010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)using std::tr1::unordered_map; 30010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)using std::tr1::unordered_multimap; 31010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 32f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)namespace fst { 33f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 34f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// The EditFst class enables non-destructive edit operations on a wrapped 35f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// ExpandedFst. The implementation uses copy-on-write semantics at the node 36f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// level: if a user has an underlying fst on which he or she wants to perform a 37f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// relatively small number of edits (read: mutations), then this implementation 38f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// will copy the edited node to an internal MutableFst and perform any edits in 39f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// situ on that copied node. This class supports all the methods of MutableFst 40f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// except for DeleteStates(const vector<StateId> &); thus, new nodes may also be 41f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// added, and one may add transitions from existing nodes of the wrapped fst to 42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// new nodes. 43f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// 44f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// N.B.: The documentation for Fst::Copy(true) says that its behavior is 45f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// undefined if invoked on an fst that has already been accessed. This class 46f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// requires that the Fst implementation it wraps provides consistent, reliable 47f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// behavior when its Copy(true) method is invoked, where consistent means 48f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// the graph structure, graph properties and state numbering and do not change. 49f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// VectorFst and CompactFst, for example, are both well-behaved in this regard. 50f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 51f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// The EditFstData class is a container for all mutable data for EditFstImpl; 52f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// also, this class provides most of the actual implementation of what EditFst 53f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// does (that is, most of EditFstImpl's methods delegate to methods in this, the 54f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// EditFstData class). Instances of this class are reference-counted and can be 55f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// shared between otherwise independent EditFstImpl instances. This scheme 56f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// allows EditFstImpl to implement the thread-safe, copy-on-write semantics 57f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// required by Fst::Copy(true). 58f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// 59f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// template parameters: 60f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// A the type of arc to use 61f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// WrappedFstT the type of fst wrapped by the EditFst instance that 62f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// this EditFstData instance is backing 63f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// MutableFstT the type of mutable fst to use internally for edited states; 64f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// crucially, MutableFstT::Copy(false) *must* yield an fst that is 6546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)// thread-safe for reading (VectorFst, for example, has this property) 6646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)template <typename A, 67010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) typename WrappedFstT = ExpandedFst<A>, 68010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) typename MutableFstT = VectorFst<A> > 69010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)class EditFstData { 70010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) public: 71010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) typedef A Arc; 72010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) typedef typename A::Weight Weight; 73010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) typedef typename A::StateId StateId; 74010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) typedef typename unordered_map<StateId, StateId>::const_iterator 75010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) IdMapIterator; 76010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) typedef typename unordered_map<StateId, Weight>::const_iterator 77010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) FinalWeightIterator; 78010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 79010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 80f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) EditFstData() : num_new_states_(0) { 81010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) SetEmptyAndDeleteKeysForInternalMaps(); 82010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 83010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 84010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) EditFstData(const EditFstData &other) : 85010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) edits_(other.edits_), 86010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) external_to_internal_ids_(other.external_to_internal_ids_), 87010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) edited_final_weights_(other.edited_final_weights_), 88010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) num_new_states_(other.num_new_states_) { 89f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 90010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 91f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) ~EditFstData() { 92010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 93010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 94010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) static EditFstData<A, WrappedFstT, MutableFstT> *Read(istream &strm, 95010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) const FstReadOptions &opts); 96010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 97f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) bool Write(ostream &strm, const FstWriteOptions &opts) const { 98f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Serialize all private data members of this class. 99f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) FstWriteOptions edits_opts(opts); 100f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_opts.write_header = true; // Force writing contained header. 101f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.Write(strm, edits_opts); 102f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) WriteType(strm, external_to_internal_ids_); 103f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) WriteType(strm, edited_final_weights_); 104f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) WriteType(strm, num_new_states_); 105f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (!strm) { 106f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) LOG(ERROR) << "EditFstData::Write: write failed: " << opts.source; 107f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return false; 108f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 109f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return true; 110f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 111f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 112f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) int RefCount() const { return ref_count_.count(); } 113f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) int IncrRefCount() { return ref_count_.Incr(); } 114f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) int DecrRefCount() { return ref_count_.Decr(); } 115f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 116f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) StateId NumNewStates() const { 117f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return num_new_states_; 118010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 119f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 120010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // accessor methods for the fst holding edited states 121010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) StateId EditedStart() const { 122010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) return edits_.Start(); 123f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 124f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 125f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) Weight Final(StateId s, const WrappedFstT *wrapped) const { 126f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) FinalWeightIterator final_weight_it = GetFinalWeightIterator(s); 127f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (final_weight_it == NotInFinalWeightMap()) { 128f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) IdMapIterator it = GetEditedIdMapIterator(s); 129f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return it == NotInEditedMap() ? 130f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) wrapped->Final(s) : edits_.Final(it->second); 131010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 132010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) else { 133010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) return final_weight_it->second; 134010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 135f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 136f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 137010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) size_t NumArcs(StateId s, const WrappedFstT *wrapped) const { 138010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) IdMapIterator it = GetEditedIdMapIterator(s); 139f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return it == NotInEditedMap() ? 140f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) wrapped->NumArcs(s) : edits_.NumArcs(it->second); 141f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 142f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 143010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) size_t NumInputEpsilons(StateId s, const WrappedFstT *wrapped) const { 144f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) IdMapIterator it = GetEditedIdMapIterator(s); 145f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return it == NotInEditedMap() ? 146f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) wrapped->NumInputEpsilons(s) : 147f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.NumInputEpsilons(it->second); 148f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 149010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 150f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) size_t NumOutputEpsilons(StateId s, const WrappedFstT *wrapped) const { 151010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) IdMapIterator it = GetEditedIdMapIterator(s); 152f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return it == NotInEditedMap() ? 153f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) wrapped->NumOutputEpsilons(s) : 154f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.NumOutputEpsilons(it->second); 155f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 156f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 157f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) void SetEditedProperties(uint64 props, uint64 mask) { 158f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.SetProperties(props, mask); 159f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 160f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 161f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // non-const MutableFst operations 162f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 163f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Sets the start state for this fst. 164f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) void SetStart(StateId s) { 165f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.SetStart(s); 166f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 167f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 168010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Sets the final state for this fst. 169f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) Weight SetFinal(StateId s, Weight w, const WrappedFstT *wrapped) { 170f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) Weight old_weight = Final(s, wrapped); 171f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) IdMapIterator it = GetEditedIdMapIterator(s); 172f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // if we haven't already edited state s, don't add it to edited_ (which can 173f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // be expensive if s has many transitions); just use the 174f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // edited_final_weights_ map 175f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (it == NotInEditedMap()) { 176f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edited_final_weights_[s] = w; 177f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 178f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) else { 179010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) edits_.SetFinal(GetEditableInternalId(s, wrapped), w); 180010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 181010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) return old_weight; 182010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 183010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 184010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // Adds a new state to this fst, initially with no arcs. 185010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) StateId AddState(StateId curr_num_states) { 186010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) StateId internal_state_id = edits_.AddState(); 187010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) StateId external_state_id = curr_num_states; 188116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch external_to_internal_ids_[external_state_id] = internal_state_id; 189116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch num_new_states_++; 190116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return external_state_id; 191116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch } 192116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 193116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // Adds the specified arc to the specified state of this fst. 194116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch const A *AddArc(StateId s, const Arc &arc, const WrappedFstT *wrapped) { 195116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch StateId internal_id = GetEditableInternalId(s, wrapped); 196116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 197116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch size_t num_arcs = edits_.NumArcs(internal_id); 198f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) ArcIterator<MutableFstT> arc_it(edits_, internal_id); 199f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) const A *prev_arc = NULL; 200f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (num_arcs > 0) { 201f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // grab the final arc associated with this state in edits_ 202f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) arc_it.Seek(num_arcs - 1); 203f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) prev_arc = &(arc_it.Value()); 204f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 205f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.AddArc(internal_id, arc); 206f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return prev_arc; 207f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 208f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 209f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) void DeleteStates() { 210f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.DeleteStates(); 211f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) num_new_states_ = 0; 212f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) external_to_internal_ids_.clear(); 213f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edited_final_weights_.clear(); 214f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 215f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 216f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Removes all but the first n outgoing arcs of the specified state. 217f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) void DeleteArcs(StateId s, size_t n, const WrappedFstT *wrapped) { 218f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.DeleteArcs(GetEditableInternalId(s, wrapped), n); 219f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 220f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 221f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Removes all outgoing arcs from the specified state. 222f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) void DeleteArcs(StateId s, const WrappedFstT *wrapped) { 223f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.DeleteArcs(GetEditableInternalId(s, wrapped)); 224f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 225f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 226f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // end methods for non-const MutableFst operations 227f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 228f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Provides information for the generic arc iterator. 229f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) void InitArcIterator(StateId s, ArcIteratorData<Arc> *data, 230f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) const WrappedFstT *wrapped) const { 231f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) IdMapIterator id_map_it = GetEditedIdMapIterator(s); 232f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (id_map_it == NotInEditedMap()) { 233f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) VLOG(3) << "EditFstData::InitArcIterator: iterating on state " 234f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) << s << " of original fst"; 235f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) wrapped->InitArcIterator(s, data); 236f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } else { 237f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) VLOG(2) << "EditFstData::InitArcIterator: iterating on edited state " 238f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) << s << " (internal state id: " << id_map_it->second << ")"; 239f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.InitArcIterator(id_map_it->second, data); 240f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 241f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 242f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 243f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Provides information for the generic mutable arc iterator. 244f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data, 245f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) const WrappedFstT *wrapped) { 246f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) data->base = 247f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) new MutableArcIterator<MutableFstT>(&edits_, 248f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) GetEditableInternalId(s, wrapped)); 249f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 250f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 251f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Prints out the map from external to internal state id's (for debugging 252f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // purposes). 253f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) void PrintMap() { 254f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) for (IdMapIterator map_it = external_to_internal_ids_.begin(); 255f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) map_it != NotInEditedMap(); ++map_it) { 256f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) LOG(INFO) << "(external,internal)=(" 257f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) << map_it->first << "," << map_it->second << ")"; 258f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 259f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 260f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 261f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 262f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) private: 263f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) void SetEmptyAndDeleteKeysForInternalMaps() { 264f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 265f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 266f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Returns the iterator of the map from external to internal state id's 267f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // of edits_ for the specified external state id. 268f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) IdMapIterator GetEditedIdMapIterator(StateId s) const { 269f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return external_to_internal_ids_.find(s); 270f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 271f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) IdMapIterator NotInEditedMap() const { 272f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return external_to_internal_ids_.end(); 273f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 274f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 275f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) FinalWeightIterator GetFinalWeightIterator(StateId s) const { 276f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return edited_final_weights_.find(s); 277f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 278f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) FinalWeightIterator NotInFinalWeightMap() const { 279f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return edited_final_weights_.end(); 280116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch } 281f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 282f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Returns the internal state id of the specified external id if the state has 283f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // already been made editable, or else copies the state from wrapped_ 284f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // to edits_ and returns the state id of the newly editable state in edits_. 285f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // 286f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // \return makes the specified state editable if it isn't already and returns 287f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // its state id in edits_ 288f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) StateId GetEditableInternalId(StateId s, const WrappedFstT *wrapped) { 289f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) IdMapIterator id_map_it = GetEditedIdMapIterator(s); 290f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (id_map_it == NotInEditedMap()) { 291f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) StateId new_internal_id = edits_.AddState(); 292f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) VLOG(2) << "EditFstData::GetEditableInternalId: editing state " << s 293f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) << " of original fst; new internal state id:" << new_internal_id; 294f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) external_to_internal_ids_[s] = new_internal_id; 295f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) for (ArcIterator< Fst<A> > arc_iterator(*wrapped, s); 296f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) !arc_iterator.Done(); 297f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) arc_iterator.Next()) { 298f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.AddArc(new_internal_id, arc_iterator.Value()); 299f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 300f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // copy the final weight 301f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) FinalWeightIterator final_weight_it = GetFinalWeightIterator(s); 302f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (final_weight_it == NotInFinalWeightMap()) { 303f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.SetFinal(new_internal_id, wrapped->Final(s)); 304f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } else { 305f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_.SetFinal(new_internal_id, final_weight_it->second); 306f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edited_final_weights_.erase(s); 307f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 308f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return new_internal_id; 309f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } else { 310f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return id_map_it->second; 311f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 312f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 313f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 314f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // A mutable fst (by default, a VectorFst) to contain new states, and/or 315f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // copies of states from a wrapped ExpandedFst that have been modified in 316f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // some way. 317f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) MutableFstT edits_; 318f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // A mapping from external state id's to the internal id's of states that 319f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // appear in edits_. 320f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) unordered_map<StateId, StateId> external_to_internal_ids_; 321f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // A mapping from external state id's to final state weights assigned to 322f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // those states. The states in this map are *only* those whose final weight 323f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // has been modified; if any other part of the state has been modified, 324f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // the entire state is copied to edits_, and all modifications reside there. 325f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) unordered_map<StateId, Weight> edited_final_weights_; 326f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // The number of new states added to this mutable fst impl, which is <= the 327f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // number of states in edits_ (since edits_ contains both edited *and* new 328f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // states). 329f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) StateId num_new_states_; 330f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) RefCounter ref_count_; 331f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}; 332f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 333f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// EditFstData method implementations: just the Read method. 334f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)template <typename A, typename WrappedFstT, typename MutableFstT> 335f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)EditFstData<A, WrappedFstT, MutableFstT> * 336f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)EditFstData<A, WrappedFstT, MutableFstT>::Read(istream &strm, 337f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) const FstReadOptions &opts) { 338f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) EditFstData<A, WrappedFstT, MutableFstT> *data = 339f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) new EditFstData<A, WrappedFstT, MutableFstT>(); 340f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // next read in MutabelFstT machine that stores edits 341f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) FstReadOptions edits_opts(opts); 342f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) edits_opts.header = 0; // Contained header was written out, so read it in. 343f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 344f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Because our internal representation of edited states is a solid object 345f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // of type MutableFstT (defaults to VectorFst<A>) and not a pointer, 346f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // and because the static Read method allocates a new object on the heap, 347f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // we need to call Read, check if there was a failure, use 348f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // MutableFstT::operator= to assign the object (not the pointer) to the 349f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // edits_ data member (which will increase the ref count by 1 on the impl) 350f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // and, finally, delete the heap-allocated object. 351f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) MutableFstT *edits = MutableFstT::Read(strm, edits_opts); 352f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (!edits) { 353f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return 0; 354f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 355f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) data->edits_ = *edits; 356f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) delete edits; 357f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // finally, read in rest of private data members 358f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) ReadType(strm, &data->external_to_internal_ids_); 359f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) ReadType(strm, &data->edited_final_weights_); 360f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) ReadType(strm, &data->num_new_states_); 361f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (!strm) { 362f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) LOG(ERROR) << "EditFst::Read: read failed: " << opts.source; 363f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return 0; 364f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 365f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return data; 366f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)} 367f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 368f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// This class enables non-destructive edit operations on a wrapped ExpandedFst. 369f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// The implementation uses copy-on-write semantics at the node level: if a user 370f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// has an underlying fst on which he or she wants to perform a relatively small 371f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// number of edits (read: mutations), then this implementation will copy the 372f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// edited node to an internal MutableFst and perform any edits in situ on that 373f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// copied node. This class supports all the methods of MutableFst except for 374f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// DeleteStates(const vector<StateId> &); thus, new nodes may also be added, and 375f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// one may add transitions from existing nodes of the wrapped fst to new nodes. 376f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// 377f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// template parameters: 378f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// A the type of arc to use 379f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// WrappedFstT the type of fst wrapped by the EditFst instance that 380f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// this EditFstImpl instance is backing 381f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// MutableFstT the type of mutable fst to use internally for edited states; 382f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// crucially, MutableFstT::Copy(false) *must* yield an fst that is 383010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// thread-safe for reading (VectorFst, for example, has this property) 384010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)template <typename A, 385116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch typename WrappedFstT = ExpandedFst<A>, 386f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) typename MutableFstT = VectorFst<A> > 387f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)class EditFstImpl : public FstImpl<A> { 388f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) public: 389f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) using FstImpl<A>::SetProperties; 390f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) using FstImpl<A>::SetInputSymbols; 391f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) using FstImpl<A>::SetOutputSymbols; 392f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) using FstImpl<A>::WriteHeader; 393f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 3941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci typedef A Arc; 3951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci typedef typename Arc::Weight Weight; 3961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci typedef typename Arc::StateId StateId; 3971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 3981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // Constructs an editable fst implementation with no states. Effectively, 3991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // this initially-empty fst will in every way mimic the behavior of 4001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // a VectorFst--more precisely, a VectorFstImpl instance--but with slightly 4011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // slower performance (by a constant factor), due to the fact that 4021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // this class maintains a mapping between external state id's and 403f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // their internal equivalents. 404f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) EditFstImpl() { 405f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) FstImpl<A>::SetType("edit"); 406f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) wrapped_ = new MutableFstT(); 407f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) InheritPropertiesFromWrapped(); 408f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) data_ = new EditFstData<A, WrappedFstT, MutableFstT>(); 409f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 410f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 411f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // Wraps the specified ExpandedFst. This constructor requires that the 412f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // specified Fst is an ExpandedFst instance. This requirement is only enforced 413f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // at runtime. (See below for the reason.) 414116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch // 415f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // This library uses the pointer-to-implementation or "PIMPL" design pattern. 416f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // In particular, to make it convenient to bind an implementation class to its 417f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // interface, there are a pair of template "binder" classes, one for immutable 418f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // and one for mutable fst's (ImplToFst and ImplToMutableFst, respectively). 419f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // As it happens, the API for the ImplToMutableFst<I,F> class requires that 420f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // the implementation class--the template parameter "I"--have a constructor 421f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // taking a const Fst<A> reference. Accordingly, the constructor here must 422f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // perform a static_cast to the WrappedFstT type required by EditFst and 423f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // therefore EditFstImpl. 424f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) explicit EditFstImpl(const Fst<A> &wrapped) 425010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) : wrapped_(static_cast<WrappedFstT *>(wrapped.Copy())) { 426f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) FstImpl<A>::SetType("edit"); 427f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 428f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) data_ = new EditFstData<A, WrappedFstT, MutableFstT>(); 429f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // have edits_ inherit all properties from wrapped_ 430010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) data_->SetEditedProperties(wrapped_->Properties(kFstProperties, false), 431f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) kFstProperties); 432010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) InheritPropertiesFromWrapped(); 433010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) } 434010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 435010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // A copy constructor for this implementation class, used to implement 436010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) // the Copy() method of the Fst interface. 437 EditFstImpl(const EditFstImpl &impl) 438 : FstImpl<A>(), 439 wrapped_(static_cast<WrappedFstT *>(impl.wrapped_->Copy(true))), 440 data_(impl.data_) { 441 data_->IncrRefCount(); 442 SetProperties(impl.Properties()); 443 } 444 445 ~EditFstImpl() { 446 delete wrapped_; 447 if (!data_->DecrRefCount()) { 448 delete data_; 449 } 450 } 451 452 // const Fst/ExpandedFst operations, declared in the Fst and ExpandedFst 453 // interfaces 454 StateId Start() const { 455 StateId edited_start = data_->EditedStart(); 456 return edited_start == kNoStateId ? wrapped_->Start() : edited_start; 457 } 458 459 Weight Final(StateId s) const { 460 return data_->Final(s, wrapped_); 461 } 462 463 size_t NumArcs(StateId s) const { 464 return data_->NumArcs(s, wrapped_); 465 } 466 467 size_t NumInputEpsilons(StateId s) const { 468 return data_->NumInputEpsilons(s, wrapped_); 469 } 470 471 size_t NumOutputEpsilons(StateId s) const { 472 return data_->NumOutputEpsilons(s, wrapped_); 473 } 474 475 StateId NumStates() const { 476 return wrapped_->NumStates() + data_->NumNewStates(); 477 } 478 479 static EditFstImpl<A, WrappedFstT, MutableFstT> * 480 Read(istream &strm, 481 const FstReadOptions &opts); 482 483 bool Write(ostream &strm, const FstWriteOptions &opts) const { 484 FstHeader hdr; 485 hdr.SetStart(Start()); 486 hdr.SetNumStates(NumStates()); 487 FstWriteOptions header_opts(opts); 488 header_opts.write_isymbols = false; // Let contained FST hold any symbols. 489 header_opts.write_osymbols = false; 490 WriteHeader(strm, header_opts, kFileVersion, &hdr); 491 492 // First, serialize wrapped fst to stream. 493 FstWriteOptions wrapped_opts(opts); 494 wrapped_opts.write_header = true; // Force writing contained header. 495 wrapped_->Write(strm, wrapped_opts); 496 497 data_->Write(strm, opts); 498 499 strm.flush(); 500 if (!strm) { 501 LOG(ERROR) << "EditFst::Write: write failed: " << opts.source; 502 return false; 503 } 504 return true; 505 } 506 // end const Fst operations 507 508 // non-const MutableFst operations 509 510 // Sets the start state for this fst. 511 void SetStart(StateId s) { 512 MutateCheck(); 513 data_->SetStart(s); 514 SetProperties(SetStartProperties(FstImpl<A>::Properties())); 515 } 516 517 // Sets the final state for this fst. 518 void SetFinal(StateId s, Weight w) { 519 MutateCheck(); 520 Weight old_weight = data_->SetFinal(s, w, wrapped_); 521 SetProperties(SetFinalProperties(FstImpl<A>::Properties(), old_weight, w)); 522 } 523 524 // Adds a new state to this fst, initially with no arcs. 525 StateId AddState() { 526 MutateCheck(); 527 SetProperties(AddStateProperties(FstImpl<A>::Properties())); 528 return data_->AddState(NumStates()); 529 } 530 531 // Adds the specified arc to the specified state of this fst. 532 void AddArc(StateId s, const Arc &arc) { 533 MutateCheck(); 534 const A *prev_arc = data_->AddArc(s, arc, wrapped_); 535 SetProperties(AddArcProperties(FstImpl<A>::Properties(), s, arc, prev_arc)); 536 } 537 538 void DeleteStates(const vector<StateId>& dstates) { 539 FSTERROR() << ": EditFstImpl::DeleteStates(const std::vector<StateId>&): " 540 << " not implemented"; 541 SetProperties(kError, kError); 542 } 543 544 // Deletes all states in this fst. 545 void DeleteStates(); 546 547 // Removes all but the first n outgoing arcs of the specified state. 548 void DeleteArcs(StateId s, size_t n) { 549 MutateCheck(); 550 data_->DeleteArcs(s, n, wrapped_); 551 SetProperties(DeleteArcsProperties(FstImpl<A>::Properties())); 552 } 553 554 // Removes all outgoing arcs from the specified state. 555 void DeleteArcs(StateId s) { 556 MutateCheck(); 557 data_->DeleteArcs(s, wrapped_); 558 SetProperties(DeleteArcsProperties(FstImpl<A>::Properties())); 559 } 560 561 void ReserveStates(StateId s) { 562 } 563 564 void ReserveArcs(StateId s, size_t n) { 565 } 566 567 // end non-const MutableFst operations 568 569 // Provides information for the generic state iterator. 570 void InitStateIterator(StateIteratorData<Arc> *data) const { 571 data->base = 0; 572 data->nstates = NumStates(); 573 } 574 575 // Provides information for the generic arc iterator. 576 void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { 577 data_->InitArcIterator(s, data, wrapped_); 578 } 579 580 // Provides information for the generic mutable arc iterator. 581 void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) { 582 MutateCheck(); 583 data_->InitMutableArcIterator(s, data, wrapped_); 584 } 585 586 private: 587 typedef typename unordered_map<StateId, StateId>::const_iterator 588 IdMapIterator; 589 typedef typename unordered_map<StateId, Weight>::const_iterator 590 FinalWeightIterator; 591 // Properties always true of this Fst class 592 static const uint64 kStaticProperties = kExpanded | kMutable; 593 // Current file format version 594 static const int kFileVersion = 2; 595 // Minimum file format version supported 596 static const int kMinFileVersion = 2; 597 598 // Causes this fst to inherit all the properties from its wrapped fst, except 599 // for the two properties that always apply to EditFst instances: kExpanded 600 // and kMutable. 601 void InheritPropertiesFromWrapped() { 602 SetProperties(wrapped_->Properties(kCopyProperties, false) | 603 kStaticProperties); 604 SetInputSymbols(wrapped_->InputSymbols()); 605 SetOutputSymbols(wrapped_->OutputSymbols()); 606 } 607 608 // This method ensures that any operations that alter the mutable data 609 // portion of this EditFstImpl cause the data_ member to be copied when its 610 // reference count is greater than 1. Note that this method is distinct from 611 // MutableFst::Mutate, which gets invoked whenever one of the basic mutation 612 // methods defined in MutableFst is invoked, such as SetInputSymbols. 613 // The MutateCheck here in EditFstImpl is invoked whenever one of the 614 // mutating methods specifically related to the types of edits provided 615 // by EditFst is performed, such as changing an arc of an existing state 616 // of the wrapped fst via a MutableArcIterator, or adding a new state via 617 // AddState(). 618 void MutateCheck() { 619 if (data_->RefCount() > 1) { 620 EditFstData<A, WrappedFstT, MutableFstT> *data_copy = 621 new EditFstData<A, WrappedFstT, MutableFstT>(*data_); 622 if (data_ && !data_->DecrRefCount()) { 623 delete data_; 624 } 625 data_ = data_copy; 626 } 627 } 628 629 // The fst that this fst wraps. The purpose of this class is to enable 630 // non-destructive edits on this wrapped fst. 631 const WrappedFstT *wrapped_; 632 // The mutable data for this EditFst instance, with delegates for all the 633 // methods that can mutate data. 634 EditFstData<A, WrappedFstT, MutableFstT> *data_; 635}; 636 637template <typename A, typename WrappedFstT, typename MutableFstT> 638const uint64 EditFstImpl<A, WrappedFstT, MutableFstT>::kStaticProperties; 639 640// EditFstImpl IMPLEMENTATION STARTS HERE 641 642template<typename A, typename WrappedFstT, typename MutableFstT> 643inline void EditFstImpl<A, WrappedFstT, MutableFstT>::DeleteStates() { 644 data_->DeleteStates(); 645 delete wrapped_; 646 // we are deleting all states, so just forget about pointer to wrapped_ 647 // and do what default constructor does: set wrapped_ to a new VectorFst 648 wrapped_ = new MutableFstT(); 649 uint64 newProps = DeleteAllStatesProperties(FstImpl<A>::Properties(), 650 kStaticProperties); 651 FstImpl<A>::SetProperties(newProps); 652} 653 654template <typename A, typename WrappedFstT, typename MutableFstT> 655EditFstImpl<A, WrappedFstT, MutableFstT> * 656EditFstImpl<A, WrappedFstT, MutableFstT>::Read(istream &strm, 657 const FstReadOptions &opts) { 658 EditFstImpl<A, WrappedFstT, MutableFstT> *impl = new EditFstImpl(); 659 FstHeader hdr; 660 if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { 661 return 0; 662 } 663 impl->SetStart(hdr.Start()); 664 665 // first, read in wrapped fst 666 FstReadOptions wrapped_opts(opts); 667 wrapped_opts.header = 0; // Contained header was written out, so read it in. 668 Fst<A> *wrapped_fst = Fst<A>::Read(strm, wrapped_opts); 669 if (!wrapped_fst) { 670 return 0; 671 } 672 impl->wrapped_ = static_cast<WrappedFstT *>(wrapped_fst); 673 674 impl->data_ = EditFstData<A, WrappedFstT, MutableFstT>::Read(strm, opts); 675 676 if (!impl->data_) { 677 delete wrapped_fst; 678 return 0; 679 } 680 681 return impl; 682} 683 684// END EditFstImpl IMPLEMENTATION 685 686// Concrete, editable FST. This class attaches interface to implementation. 687template <typename A, 688 typename WrappedFstT = ExpandedFst<A>, 689 typename MutableFstT = VectorFst<A> > 690class EditFst : 691 public ImplToMutableFst< EditFstImpl<A, WrappedFstT, MutableFstT> > { 692 public: 693 friend class MutableArcIterator< EditFst<A, WrappedFstT, MutableFstT> >; 694 695 typedef A Arc; 696 typedef typename A::StateId StateId; 697 typedef EditFstImpl<A, WrappedFstT, MutableFstT> Impl; 698 699 EditFst() : ImplToMutableFst<Impl>(new Impl()) {} 700 701 explicit EditFst(const Fst<A> &fst) : 702 ImplToMutableFst<Impl>(new Impl(fst)) {} 703 704 explicit EditFst(const WrappedFstT &fst) : 705 ImplToMutableFst<Impl>(new Impl(fst)) {} 706 707 // See Fst<>::Copy() for doc. 708 EditFst(const EditFst<A, WrappedFstT, MutableFstT> &fst, bool safe = false) : 709 ImplToMutableFst<Impl>(fst, safe) {} 710 711 virtual ~EditFst() {} 712 713 // Get a copy of this EditFst. See Fst<>::Copy() for further doc. 714 virtual EditFst<A, WrappedFstT, MutableFstT> *Copy(bool safe = false) const { 715 return new EditFst<A, WrappedFstT, MutableFstT>(*this, safe); 716 } 717 718 EditFst<A, WrappedFstT, MutableFstT> & 719 operator=(const EditFst<A, WrappedFstT, MutableFstT> &fst) { 720 SetImpl(fst.GetImpl(), false); 721 return *this; 722 } 723 724 virtual EditFst<A, WrappedFstT, MutableFstT> &operator=(const Fst<A> &fst) { 725 if (this != &fst) { 726 SetImpl(new Impl(fst)); 727 } 728 return *this; 729 } 730 731 // Read an EditFst from an input stream; return NULL on error. 732 static EditFst<A, WrappedFstT, MutableFstT> * 733 Read(istream &strm, 734 const FstReadOptions &opts) { 735 Impl* impl = Impl::Read(strm, opts); 736 return impl ? new EditFst<A>(impl) : 0; 737 } 738 739 // Read an EditFst from a file; return NULL on error. 740 // Empty filename reads from standard input. 741 static EditFst<A, WrappedFstT, MutableFstT> *Read(const string &filename) { 742 Impl* impl = ImplToExpandedFst<Impl, MutableFst<A> >::Read(filename); 743 return impl ? new EditFst<A, WrappedFstT, MutableFstT>(impl) : 0; 744 } 745 746 virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { 747 return GetImpl()->Write(strm, opts); 748 } 749 750 virtual bool Write(const string &filename) const { 751 return Fst<A>::WriteFile(filename); 752 } 753 754 virtual void InitStateIterator(StateIteratorData<Arc> *data) const { 755 GetImpl()->InitStateIterator(data); 756 } 757 758 virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { 759 GetImpl()->InitArcIterator(s, data); 760 } 761 762 virtual 763 void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) { 764 GetImpl()->InitMutableArcIterator(s, data); 765 } 766 private: 767 explicit EditFst(Impl *impl) : ImplToMutableFst<Impl>(impl) {} 768 769 // Makes visible to friends. 770 Impl *GetImpl() const { return ImplToFst< Impl, MutableFst<A> >::GetImpl(); } 771 772 void SetImpl(Impl *impl, bool own_impl = true) { 773 ImplToFst< Impl, MutableFst<A> >::SetImpl(impl, own_impl); 774 } 775}; 776 777} // namespace fst 778 779#endif // FST_LIB_EDIT_FST_H_ 780