1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// expand.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Expand a PDT to an FST. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_PDT_EXPAND_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_PDT_EXPAND_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/pdt/pdt.h> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/pdt/paren.h> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/pdt/shortest-path.h> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/pdt/reverse.h> 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h> 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/mutable-fst.h> 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/queue.h> 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/state-table.h> 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h> 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc> 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ExpandFstOptions : public CacheOptions { 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool keep_parentheses; 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtStack<typename Arc::StateId, typename Arc::Label> *stack; 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtStateTable<typename Arc::StateId, typename Arc::StateId> *state_table; 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandFstOptions( 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const CacheOptions &opts = CacheOptions(), 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool kp = false, 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtStack<typename Arc::StateId, typename Arc::Label> *s = 0, 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtStateTable<typename Arc::StateId, typename Arc::StateId> *st = 0) 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheOptions(opts), keep_parentheses(kp), stack(s), state_table(st) {} 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Properties for an expanded PDT. 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsoninline uint64 ExpandProperties(uint64 inprops) { 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return inprops & (kAcceptor | kAcyclic | kInitialAcyclic | kUnweighted); 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Implementation class for ExpandFst 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ExpandFstImpl 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public CacheImpl<A> { 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetType; 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetProperties; 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::Properties; 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetInputSymbols; 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using FstImpl<A>::SetOutputSymbols; 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::PushArc; 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::HasArcs; 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::HasFinal; 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::HasStart; 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::SetArcs; 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::SetFinal; 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson using CacheBaseImpl< CacheState<A> >::SetStart; 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef StateId StackId; 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef PdtStateTuple<StateId, StackId> StateTuple; 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandFstImpl(const Fst<A> &fst, 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<typename Arc::Label, 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Label> > &parens, 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ExpandFstOptions<A> &opts) 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheImpl<A>(opts), fst_(fst.Copy()), 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson stack_(opts.stack ? opts.stack: new PdtStack<StateId, Label>(parens)), 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table_(opts.state_table ? opts.state_table : 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson new PdtStateTable<StateId, StackId>()), 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson own_stack_(opts.stack == 0), own_state_table_(opts.state_table == 0), 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson keep_parentheses_(opts.keep_parentheses) { 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetType("expand"); 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint64 props = fst.Properties(kFstProperties, false); 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(ExpandProperties(props), kCopyProperties); 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetInputSymbols(fst.InputSymbols()); 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetOutputSymbols(fst.OutputSymbols()); 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandFstImpl(const ExpandFstImpl &impl) 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheImpl<A>(impl), 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst_(impl.fst_->Copy(true)), 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson stack_(new PdtStack<StateId, Label>(*impl.stack_)), 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_table_(new PdtStateTable<StateId, StackId>()), 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson own_stack_(true), own_state_table_(true), 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson keep_parentheses_(impl.keep_parentheses_) { 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetType("expand"); 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetProperties(impl.Properties(), kCopyProperties); 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetInputSymbols(impl.InputSymbols()); 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetOutputSymbols(impl.OutputSymbols()); 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~ExpandFstImpl() { 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete fst_; 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (own_stack_) 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete stack_; 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (own_state_table_) 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete state_table_; 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId Start() { 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasStart()) { 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = fst_->Start(); 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (s == kNoStateId) 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return kNoStateId; 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple tuple(s, 0); 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId start = state_table_->FindState(tuple); 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetStart(start); 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::Start(); 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight Final(StateId s) { 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasFinal(s)) { 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const StateTuple &tuple = state_table_->Tuple(s); 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w = fst_->Final(tuple.state_id); 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (w != Weight::Zero() && tuple.stack_id == 0) 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinal(s, w); 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinal(s, Weight::Zero()); 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::Final(s); 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumArcs(StateId s) { 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) { 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandState(s); 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumArcs(s); 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumInputEpsilons(StateId s) { 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandState(s); 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumInputEpsilons(s); 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t NumOutputEpsilons(StateId s) { 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandState(s); 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return CacheImpl<A>::NumOutputEpsilons(s); 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void InitArcIterator(StateId s, ArcIteratorData<A> *data) { 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!HasArcs(s)) 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandState(s); 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson CacheImpl<A>::InitArcIterator(s, data); 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Computes the outgoing transitions from a state, creating new destination 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // states as needed. 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void ExpandState(StateId s) { 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple tuple = state_table_->Tuple(s); 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator< Fst<A> > aiter(*fst_, tuple.state_id); 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !aiter.Done(); aiter.Next()) { 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arc = aiter.Value(); 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackId stack_id = stack_->Find(tuple.stack_id, arc.ilabel); 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (stack_id == -1) { 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Non-matching close parenthesis 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson continue; 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if ((stack_id != tuple.stack_id) && !keep_parentheses_) { 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Stack push/pop 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc.ilabel = arc.olabel = 0; 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTuple ntuple(arc.nextstate, stack_id); 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc.nextstate = state_table_->FindState(ntuple); 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PushArc(s, arc); 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetArcs(s); 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const PdtStack<StackId, Label> &GetStack() const { return *stack_; } 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const PdtStateTable<StateId, StackId> &GetStateTable() const { 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return *state_table_; 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<A> *fst_; 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtStack<StackId, Label> *stack_; 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtStateTable<StateId, StackId> *state_table_; 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool own_stack_; 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool own_state_table_; 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool keep_parentheses_; 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void operator=(const ExpandFstImpl<A> &); // disallow 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Expands a pushdown transducer (PDT) encoded as an FST into an FST. 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This version is a delayed Fst. In the PDT, some transitions are 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// labeled with open or close parentheses. To be interpreted as a PDT, 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the parens must balance on a path. The open-close parenthesis label 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// pairs are passed in 'parens'. The expansion enforces the 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// parenthesis constraints. The PDT must be expandable as an FST. 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This class attaches interface to implementation and handles 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reference counting, delegating most methods to ImplToFst. 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ExpandFst : public ImplToFst< ExpandFstImpl<A> > { 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class ArcIterator< ExpandFst<A> >; 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson friend class StateIterator< ExpandFst<A> >; 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef StateId StackId; 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef CacheState<A> State; 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef ExpandFstImpl<A> Impl; 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandFst(const Fst<A> &fst, 239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<typename Arc::Label, 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Label> > &parens) 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(new Impl(fst, parens, ExpandFstOptions<A>())) {} 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandFst(const Fst<A> &fst, 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<typename Arc::Label, 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Label> > &parens, 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ExpandFstOptions<A> &opts) 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(new Impl(fst, parens, opts)) {} 248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // See Fst<>::Copy() for doc. 250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandFst(const ExpandFst<A> &fst, bool safe = false) 251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ImplToFst<Impl>(fst, safe) {} 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Get a copy of this ExpandFst. See Fst<>::Copy() for further doc. 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual ExpandFst<A> *Copy(bool safe = false) const { 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return new ExpandFst<A>(*this, safe); 256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual inline void InitStateIterator(StateIteratorData<A> *data) const; 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { 261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GetImpl()->InitArcIterator(s, data); 262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const PdtStack<StackId, Label> &GetStack() const { 265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return GetImpl()->GetStack(); 266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const PdtStateTable<StateId, StackId> &GetStateTable() const { 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return GetImpl()->GetStateTable(); 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Makes visible to friends. 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void operator=(const ExpandFst<A> &fst); // Disallow 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for ExpandFst. 281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class A> 282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StateIterator< ExpandFst<A> > 283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public CacheStateIterator< ExpandFst<A> > { 284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson explicit StateIterator(const ExpandFst<A> &fst) 286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheStateIterator< ExpandFst<A> >(fst, fst.GetImpl()) {} 287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialization for ExpandFst. 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ArcIterator< ExpandFst<A> > 293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public CacheArcIterator< ExpandFst<A> > { 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator(const ExpandFst<A> &fst, StateId s) 298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : CacheArcIterator< ExpandFst<A> >(fst.GetImpl(), s) { 299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!fst.GetImpl()->HasArcs(s)) 300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst.GetImpl()->ExpandState(s); 301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(ArcIterator); 305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> inline 309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ExpandFst<A>::InitStateIterator(StateIteratorData<A> *data) const 310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson{ 311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson data->base = new StateIterator< ExpandFst<A> >(*this); 312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// PrunedExpand Class 316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Prunes the delayed expansion of a pushdown transducer (PDT) encoded 319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// as an FST into an FST. In the PDT, some transitions are labeled 320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// with open or close parentheses. To be interpreted as a PDT, the 321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// parens must balance on a path. The open-close parenthesis label 322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// pairs are passed in 'parens'. The expansion enforces the 323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// parenthesis constraints. 324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The algorithm works by visiting the delayed ExpandFst using a 326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// shortest-stack first queue discipline and relies on the 327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// shortest-distance information computed using a reverse 328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// shortest-path call to perform the pruning. 329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The algorithm maintains the same state ordering between the ExpandFst 331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// being visited 'efst_' and the result of pruning written into the 332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// MutableFst 'ofst_' to improve readability of the code. 333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass PrunedExpand { 336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef StateId StackId; 342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef PdtStack<StackId, Label> Stack; 343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef PdtStateTable<StateId, StackId> StateTable; 344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename PdtBalanceData<Arc>::SetIterator SetIterator; 345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Constructor taking as input a PDT specified by 'ifst' and 'parens'. 347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'keep_parentheses' specifies whether parentheses are replaced by 348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // epsilons or not during the expansion. 'opts' is the cache options 349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // used to instantiate the underlying ExpandFst. 350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrunedExpand(const Fst<A> &ifst, 351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<Label, Label> > &parens, 352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool keep_parentheses = false, 353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const CacheOptions &opts = CacheOptions()) 354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ifst_(ifst.Copy()), 355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson keep_parentheses_(keep_parentheses), 356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson stack_(parens), 357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson efst_(ifst, parens, 358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandFstOptions<Arc>(opts, true, &stack_, &state_table_)), 359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson queue_(state_table_, stack_, stack_length_, distance_, fdistance_) { 360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Reverse(*ifst_, parens, &rfst_); 361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VectorFst<Arc> path; 362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reverse_shortest_path_ = new SP( 363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson rfst_, parens, 364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtShortestPathOptions<A, FifoQueue<StateId> >(true, false)); 365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson reverse_shortest_path_->ShortestPath(&path); 366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson balance_data_ = reverse_shortest_path_->GetBalanceData()->Reverse( 367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson rfst_.NumStates(), 10, -1); 368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson InitCloseParenMultimap(parens); 370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ~PrunedExpand() { 373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete ifst_; 374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete reverse_shortest_path_; 375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete balance_data_; 376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Expands and prunes with weight threshold 'threshold' the input PDT. 379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Writes the result in 'ofst'. 380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Expand(MutableFst<A> *ofst, const Weight &threshold); 381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const uint8 kEnqueued; 384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const uint8 kExpanded; 385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson static const uint8 kSourceState; 386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Comparison functor used by the queue: 388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 1. states corresponding to shortest stack first, 389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 2. among stacks of the same length, reverse lexicographic order is used, 390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 3. among states with the same stack, shortest-first order is used. 391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class StackCompare { 392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackCompare(const StateTable &st, 394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Stack &s, const vector<StackId> &sl, 395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Weight> &d, const vector<Weight> &fd) 396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : state_table_(st), stack_(s), stack_length_(sl), 397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson distance_(d), fdistance_(fd) {} 398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator()(StateId s1, StateId s2) const { 400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackId si1 = state_table_.Tuple(s1).stack_id; 401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackId si2 = state_table_.Tuple(s2).stack_id; 402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (stack_length_[si1] < stack_length_[si2]) 403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (stack_length_[si1] > stack_length_[si2]) 405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If stack id equal, use A* 407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (si1 == si2) { 408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w1 = (s1 < distance_.size()) && (s1 < fdistance_.size()) ? 409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Times(distance_[s1], fdistance_[s1]) : Weight::Zero(); 410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w2 = (s2 < distance_.size()) && (s2 < fdistance_.size()) ? 411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Times(distance_[s2], fdistance_[s2]) : Weight::Zero(); 412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return less_(w1, w2); 413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // If lenghts are equal, use reverse lexico. 415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (; si1 != si2; si1 = stack_.Pop(si1), si2 = stack_.Pop(si2)) { 416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (stack_.Top(si1) < stack_.Top(si2)) return true; 417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (stack_.Top(si1) > stack_.Top(si2)) return false; 418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const StateTable &state_table_; 424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Stack &stack_; 425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<StackId> &stack_length_; 426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Weight> &distance_; 427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Weight> &fdistance_; 428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NaturalLess<Weight> less_; 429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson class ShortestStackFirstQueue 432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public ShortestFirstQueue<StateId, StackCompare> { 433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestStackFirstQueue( 435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const PdtStateTable<StateId, StackId> &st, 436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Stack &s, 437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<StackId> &sl, 438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Weight> &d, const vector<Weight> &fd) 439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ShortestFirstQueue<StateId, StackCompare>( 440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackCompare(st, s, sl, d, fd)) {} 441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson }; 442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void InitCloseParenMultimap(const vector<pair<Label, Label> > &parens); 445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight DistanceToDest(StateId state, StateId source) const; 446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson uint8 Flags(StateId s) const; 447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetFlags(StateId s, uint8 flags, uint8 mask); 448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight Distance(StateId s) const; 449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetDistance(StateId s, Weight w); 450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight FinalDistance(StateId s) const; 451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetFinalDistance(StateId s, Weight w); 452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId SourceState(StateId s) const; 453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void SetSourceState(StateId s, StateId p); 454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void AddStateAndEnqueue(StateId s); 455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Relax(StateId s, const A &arc, Weight w); 456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool PruneArc(StateId s, const A &arc); 457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void ProcStart(); 458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void ProcFinal(StateId s); 459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ProcNonParen(StateId s, const A &arc, bool add_arc); 460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ProcOpenParen(StateId s, const A &arc, StackId si, StackId nsi); 461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ProcCloseParen(StateId s, const A &arc); 462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void ProcDestStates(StateId s, StackId si); 463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Fst<A> *ifst_; // Input PDT 465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VectorFst<Arc> rfst_; // Reversed PDT 466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool keep_parentheses_; // Keep parentheses in ofst? 467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateTable state_table_; // State table for efst_ 468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Stack stack_; // Stack trie 469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandFst<Arc> efst_; // Expanded PDT 470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StackId> stack_length_; // Length of stack for given stack id 471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Weight> distance_; // Distance from initial state in efst_/ofst 472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Weight> fdistance_; // Distance to final states in efst_/ofst 473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestStackFirstQueue queue_; // Queue used to visit efst_ 474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<uint8> flags_; // Status flags for states in efst_/ofst 475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StateId> sources_; // PDT source state for each expanded state 476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef PdtShortestPath<Arc, FifoQueue<StateId> > SP; 478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename SP::CloseParenMultimap ParenMultimap; 479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SP *reverse_shortest_path_; // Shortest path for rfst_ 480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtBalanceData<Arc> *balance_data_; // Not owned by shortest_path_ 481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ParenMultimap close_paren_multimap_; // Maps open paren arcs to 482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // balancing close paren arcs. 483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst_; // Output fst 485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight limit_; // Weight limit 486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef unordered_map<StateId, Weight> DestMap; 488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DestMap dest_map_; 489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackId current_stack_id_; 490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'current_stack_id_' is the stack id of the states currently at the top 491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // of queue, i.e., the states currently being popped and processed. 492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'dest_map_' maps a state 's' in 'ifst_' that is the source 493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // of a close parentheses matching the top of 'current_stack_id_; to 494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // the shortest-distance from '(s, current_stack_id_)' to the final 495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // states in 'efst_'. 496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t current_paren_id_; // Paren id at top of current stack 497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t cached_stack_id_; 498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId cached_source_; 499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson slist<pair<StateId, Weight> > cached_dest_list_; 500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'cached_dest_list_' contains the set of pair of destination 501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // states and weight to final states for source state 502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'cached_source_' and paren id 'cached_paren_id': the set of 503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // source state of a close parenthesis with paren id 504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'cached_paren_id' balancing an incoming open parenthesis with 505f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // paren id 'cached_paren_id' in state 'cached_source_'. 506f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 507f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NaturalLess<Weight> less_; 508f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 509f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 510f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> const uint8 PrunedExpand<A>::kEnqueued = 0x01; 511f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> const uint8 PrunedExpand<A>::kExpanded = 0x02; 512f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> const uint8 PrunedExpand<A>::kSourceState = 0x04; 513f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 514f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 515f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Initializes close paren multimap, mapping pairs (s,paren_id) to 516f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// all the arcs out of s labeled with close parenthese for paren_id. 517f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 518f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::InitCloseParenMultimap( 519f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<Label, Label> > &parens) { 520f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_map<Label, Label> paren_id_map; 521f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (Label i = 0; i < parens.size(); ++i) { 522f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const pair<Label, Label> &p = parens[i]; 523f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson paren_id_map[p.first] = i; 524f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson paren_id_map[p.second] = i; 525f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 526f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 527f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateIterator<Fst<Arc> > siter(*ifst_); !siter.Done(); siter.Next()) { 528f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = siter.Value(); 529f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator<Fst<Arc> > aiter(*ifst_, s); 530f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !aiter.Done(); aiter.Next()) { 531f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Arc &arc = aiter.Value(); 532f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename unordered_map<Label, Label>::const_iterator pit 533f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson = paren_id_map.find(arc.ilabel); 534f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (pit == paren_id_map.end()) continue; 535f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (arc.ilabel == parens[pit->second].second) { // Close paren 536f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ParenState<Arc> paren_state(pit->second, s); 537f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson close_paren_multimap_.insert(make_pair(paren_state, arc)); 538f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 539f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 540f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 541f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 542f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 543f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 544f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Returns the weight of the shortest balanced path from 'source' to 'dest' 545f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// in 'ifst_', 'dest' must be the source state of a close paren arc. 546f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 547f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypename A::Weight PrunedExpand<A>::DistanceToDest(StateId source, 548f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId dest) const { 549f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename SP::SearchState s(source + 1, dest + 1); 550f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "D(" << source << ", " << dest << ") =" 551f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << reverse_shortest_path_->GetShortestPathData().Distance(s); 552f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return reverse_shortest_path_->GetShortestPathData().Distance(s); 553f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 554f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 555f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Returns the flags for state 's' in 'ofst_'. 556f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 557f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonuint8 PrunedExpand<A>::Flags(StateId s) const { 558f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return s < flags_.size() ? flags_[s] : 0; 559f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 560f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 561f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Modifies the flags for state 's' in 'ofst_'. 562f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 563f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::SetFlags(StateId s, uint8 flags, uint8 mask) { 564f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (flags_.size() <= s) flags_.push_back(0); 565f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson flags_[s] &= ~mask; 566f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson flags_[s] |= flags & mask; 567f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 568f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 569f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 570f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Returns the shortest distance from the initial state to 's' in 'ofst_'. 571f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 572f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypename A::Weight PrunedExpand<A>::Distance(StateId s) const { 573f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return s < distance_.size() ? distance_[s] : Weight::Zero(); 574f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 575f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 576f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Sets the shortest distance from the initial state to 's' in 'ofst_' to 'w'. 577f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 578f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::SetDistance(StateId s, Weight w) { 579f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (distance_.size() <= s ) distance_.push_back(Weight::Zero()); 580f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson distance_[s] = w; 581f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 582f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 583f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 584f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Returns the shortest distance from 's' to the final states in 'ofst_'. 585f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 586f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypename A::Weight PrunedExpand<A>::FinalDistance(StateId s) const { 587f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return s < fdistance_.size() ? fdistance_[s] : Weight::Zero(); 588f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 589f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 590f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Sets the shortest distance from 's' to the final states in 'ofst_' to 'w'. 591f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 592f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::SetFinalDistance(StateId s, Weight w) { 593f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (fdistance_.size() <= s) fdistance_.push_back(Weight::Zero()); 594f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fdistance_[s] = w; 595f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 596f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 597f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Returns the PDT "source" state of state 's' in 'ofst_'. 598f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 599f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontypename A::StateId PrunedExpand<A>::SourceState(StateId s) const { 600f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return s < sources_.size() ? sources_[s] : kNoStateId; 601f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 602f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 603f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Sets the PDT "source" state of state 's' in 'ofst_' to state 'p' in 'ifst_'. 604f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 605f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::SetSourceState(StateId s, StateId p) { 606f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (sources_.size() <= s) sources_.push_back(kNoStateId); 607f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson sources_[s] = p; 608f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 609f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 610f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Adds state 's' of 'efst_' to 'ofst_' and inserts it in the queue, 611f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// modifying the flags for 's' accordingly. 612f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 613f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::AddStateAndEnqueue(StateId s) { 614f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!(Flags(s) & (kEnqueued | kExpanded))) { 615f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (ofst_->NumStates() <= s) ofst_->AddState(); 616f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson queue_.Enqueue(s); 617f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFlags(s, kEnqueued, kEnqueued); 618f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (Flags(s) & kEnqueued) { 619f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson queue_.Update(s); 620f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 621f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // TODO(allauzen): Check everything is fine when kExpanded? 622f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 623f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 624f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Relaxes arc 'arc' out of state 's' in 'ofst_': 625f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// * if the distance to 's' times the weight of 'arc' is smaller than 626f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the currently stored distance for 'arc.nextstate', 627f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// updates 'Distance(arc.nextstate)' with new estimate; 628f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// * if 'fd' is less than the currently stored distance from 'arc.nextstate' 629f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// to the final state, updates with new estimate. 630f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 631f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::Relax(StateId s, const A &arc, Weight fd) { 632f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight nd = Times(Distance(s), arc.weight); 633f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (less_(nd, Distance(arc.nextstate))) { 634f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetDistance(arc.nextstate, nd); 635f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetSourceState(arc.nextstate, SourceState(s)); 636f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 637f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (less_(fd, FinalDistance(arc.nextstate))) 638f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinalDistance(arc.nextstate, fd); 639f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "Relax: " << s << ", d[s] = " << Distance(s) << ", to " 640f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << arc.nextstate << ", d[ns] = " << Distance(arc.nextstate) 641f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", nd = " << nd; 642f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 643f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 644f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Returns 'true' if the arc 'arc' out of state 's' in 'efst_' needs to 645f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// be pruned. 646f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 647f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool PrunedExpand<A>::PruneArc(StateId s, const A &arc) { 648f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "Prune ?"; 649f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight fd = Weight::Zero(); 650f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 651f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((cached_source_ != SourceState(s)) || 652f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (cached_stack_id_ != current_stack_id_)) { 653f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cached_source_ = SourceState(s); 654f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cached_stack_id_ = current_stack_id_; 655f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cached_dest_list_.clear(); 656f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (cached_source_ != ifst_->Start()) { 657f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (SetIterator set_iter = 658f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson balance_data_->Find(current_paren_id_, cached_source_); 659f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !set_iter.Done(); set_iter.Next()) { 660f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId dest = set_iter.Element(); 661f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename DestMap::const_iterator iter = dest_map_.find(dest); 662f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cached_dest_list_.push_front(*iter); 663f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 664f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 665f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // TODO(allauzen): queue discipline should prevent this never 666f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // from happening; replace by a check. 667f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cached_dest_list_.push_front( 668f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson make_pair(rfst_.Start() -1, Weight::One())); 669f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 670f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 671f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 672f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (typename slist<pair<StateId, Weight> >::const_iterator iter = 673f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cached_dest_list_.begin(); 674f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iter != cached_dest_list_.end(); 675f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++iter) { 676f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fd = Plus(fd, 677f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Times(DistanceToDest(state_table_.Tuple(arc.nextstate).state_id, 678f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iter->first), 679f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iter->second)); 680f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 681f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Relax(s, arc, fd); 682f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w = Times(Distance(s), Times(arc.weight, fd)); 683f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return less_(limit_, w); 684f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 685f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 686f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Adds start state of 'efst_' to 'ofst_', enqueues it and initializes 687f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the distance data structures. 688f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 689f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::ProcStart() { 690f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = efst_.Start(); 691f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson AddStateAndEnqueue(s); 692f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst_->SetStart(s); 693f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetSourceState(s, ifst_->Start()); 694f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 695f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_stack_id_ = 0; 696f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_paren_id_ = -1; 697f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson stack_length_.push_back(0); 698f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dest_map_[rfst_.Start() - 1] = Weight::One(); // not needed 699f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 700f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cached_source_ = ifst_->Start(); 701f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cached_stack_id_ = 0; 702f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cached_dest_list_.push_front( 703f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson make_pair(rfst_.Start() -1, Weight::One())); 704f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 705f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtStateTuple<StateId, StackId> tuple(rfst_.Start() - 1, 0); 706f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinalDistance(state_table_.FindState(tuple), Weight::One()); 707f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetDistance(s, Weight::One()); 708f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinalDistance(s, DistanceToDest(ifst_->Start(), rfst_.Start() - 1)); 709f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << DistanceToDest(ifst_->Start(), rfst_.Start() - 1); 710f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 711f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 712f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Makes 's' final in 'ofst_' if shortest accepting path ending in 's' 713f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// is below threshold. 714f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 715f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::ProcFinal(StateId s) { 716f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight final = efst_.Final(s); 717f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((final == Weight::Zero()) || less_(limit_, Times(Distance(s), final))) 718f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 719f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst_->SetFinal(s, final); 720f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 721f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 722f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Returns true when arc (or meta-arc) 'arc' out of 's' in 'efst_' is 723f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// below the threshold. When 'add_arc' is true, 'arc' is added to 'ofst_'. 724f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 725f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool PrunedExpand<A>::ProcNonParen(StateId s, const A &arc, bool add_arc) { 726f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "ProcNonParen: " << s << " to " << arc.nextstate 727f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", " << arc.ilabel << ":" << arc.olabel << " / " << arc.weight 728f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", add_arc = " << (add_arc ? "true" : "false"); 729f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (PruneArc(s, arc)) return false; 730f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if(add_arc) ofst_->AddArc(s, arc); 731f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson AddStateAndEnqueue(arc.nextstate); 732f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 733f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 734f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 735f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Processes an open paren arc 'arc' out of state 's' in 'ofst_'. 736f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// When 'arc' is labeled with an open paren, 737f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 1. considers each (shortest) balanced path starting in 's' by 738f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// taking 'arc' and ending by a close paren balancing the open 739f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// paren of 'arc' as a meta-arc, processes and prunes each meta-arc 740f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// as a non-paren arc, inserting its destination to the queue; 741f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 2. if at least one of these meta-arcs has not been pruned, 742f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// adds the destination of 'arc' to 'ofst_' as a new source state 743f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// for the stack id 'nsi' and inserts it in the queue. 744f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 745f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool PrunedExpand<A>::ProcOpenParen(StateId s, const A &arc, StackId si, 746f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackId nsi) { 747f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Update the stack lenght when needed: |nsi| = |si| + 1. 748f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (stack_length_.size() <= nsi) stack_length_.push_back(-1); 749f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (stack_length_[nsi] == -1) 750f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson stack_length_[nsi] = stack_length_[si] + 1; 751f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 752f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId ns = arc.nextstate; 753f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "Open paren: " << s << "(" << state_table_.Tuple(s).state_id 754f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ") to " << ns << "(" << state_table_.Tuple(ns).state_id << ")"; 755f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool proc_arc = false; 756f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight fd = Weight::Zero(); 757f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t paren_id = stack_.ParenId(arc.ilabel); 758f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson slist<StateId> sources; 759f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (SetIterator set_iter = 760f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson balance_data_->Find(paren_id, state_table_.Tuple(ns).state_id); 761f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !set_iter.Done(); set_iter.Next()) { 762f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson sources.push_front(set_iter.Element()); 763f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 764f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (typename slist<StateId>::const_iterator sources_iter = sources.begin(); 765f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson sources_iter != sources.end(); 766f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++ sources_iter) { 767f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId source = *sources_iter; 768f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "Close paren source: " << source; 769f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ParenState<Arc> paren_state(paren_id, source); 770f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (typename ParenMultimap::const_iterator iter = 771f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson close_paren_multimap_.find(paren_state); 772f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iter != close_paren_multimap_.end() && paren_state == iter->first; 773f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++iter) { 774f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc meta_arc = iter->second; 775f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtStateTuple<StateId, StackId> tuple(meta_arc.nextstate, si); 776f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson meta_arc.nextstate = state_table_.FindState(tuple); 777f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << state_table_.Tuple(ns).state_id << ", " << source; 778f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "Meta arc weight = " << arc.weight << " Times " 779f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << DistanceToDest(state_table_.Tuple(ns).state_id, source) 780f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << " Times " << meta_arc.weight; 781f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson meta_arc.weight = Times( 782f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc.weight, 783f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Times(DistanceToDest(state_table_.Tuple(ns).state_id, source), 784f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson meta_arc.weight)); 785f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson proc_arc |= ProcNonParen(s, meta_arc, false); 786f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fd = Plus(fd, Times( 787f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Times( 788f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DistanceToDest(state_table_.Tuple(ns).state_id, source), 789f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iter->second.weight), 790f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FinalDistance(meta_arc.nextstate))); 791f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 792f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 793f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (proc_arc) { 794f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "Proc open paren " << s << " to " << arc.nextstate; 795f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst_->AddArc( 796f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s, keep_parentheses_ ? arc : Arc(0, 0, arc.weight, arc.nextstate)); 797f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson AddStateAndEnqueue(arc.nextstate); 798f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight nd = Times(Distance(s), arc.weight); 799f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if(less_(nd, Distance(arc.nextstate))) 800f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetDistance(arc.nextstate, nd); 801f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // FinalDistance not necessary for source state since pruning 802f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // decided using the meta-arcs above. But this is a problem with 803f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // A*, hence: 804f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (less_(fd, FinalDistance(arc.nextstate))) 805f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFinalDistance(arc.nextstate, fd); 806f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFlags(arc.nextstate, kSourceState, kSourceState); 807f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 808f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return proc_arc; 809f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 810f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 811f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Checks that shortest path through close paren arc in 'efst_' is 812f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// below threshold, if so adds it to 'ofst_'. 813f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 814f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool PrunedExpand<A>::ProcCloseParen(StateId s, const A &arc) { 815f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w = Times(Distance(s), 816f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Times(arc.weight, FinalDistance(arc.nextstate))); 817f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (less_(limit_, w)) 818f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 819f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst_->AddArc( 820f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s, keep_parentheses_ ? arc : Arc(0, 0, arc.weight, arc.nextstate)); 821f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 822f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 823f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 824f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// When 's' in 'ofst_' is a source state for stack id 'si', identifies 825f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// all the corresponding possible destination states, that is, all the 826f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// states in 'ifst_' that have an outgoing close paren arc balancing 827f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the incoming open paren taken to get to 's', and for each such 828f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// state 't', computes the shortest distance from (t, si) to the final 829f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// states in 'ofst_'. Stores this information in 'dest_map_'. 830f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 831f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::ProcDestStates(StateId s, StackId si) { 832f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!(Flags(s) & kSourceState)) return; 833f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (si != current_stack_id_) { 834f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dest_map_.clear(); 835f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_stack_id_ = si; 836f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson current_paren_id_ = stack_.Top(current_stack_id_); 837f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "StackID " << si << " dequeued for first time"; 838f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 839f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // TODO(allauzen): clean up source state business; rename current function to 840f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // ProcSourceState. 841f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetSourceState(s, state_table_.Tuple(s).state_id); 842f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 843f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ssize_t paren_id = stack_.Top(si); 844f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (SetIterator set_iter = 845f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson balance_data_->Find(paren_id, state_table_.Tuple(s).state_id); 846f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !set_iter.Done(); set_iter.Next()) { 847f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId dest_state = set_iter.Element(); 848f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (dest_map_.find(dest_state) != dest_map_.end()) 849f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson continue; 850f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight dest_weight = Weight::Zero(); 851f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ParenState<Arc> paren_state(paren_id, dest_state); 852f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (typename ParenMultimap::const_iterator iter = 853f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson close_paren_multimap_.find(paren_state); 854f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson iter != close_paren_multimap_.end() && paren_state == iter->first; 855f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++iter) { 856f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Arc &arc = iter->second; 857f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtStateTuple<StateId, StackId> tuple(arc.nextstate, stack_.Pop(si)); 858f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dest_weight = Plus(dest_weight, 859f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Times(arc.weight, 860f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FinalDistance(state_table_.FindState(tuple)))); 861f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 862f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson dest_map_[dest_state] = dest_weight; 863f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "State " << dest_state << " is a dest state for stack id " 864f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << si << " with weight " << dest_weight; 865f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 866f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 867f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 868f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Expands and prunes with weight threshold 'threshold' the input PDT. 869f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Writes the result in 'ofst'. 870f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 871f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrunedExpand<A>::Expand( 872f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<A> *ofst, const typename A::Weight &threshold) { 873f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst_ = ofst; 874f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst_->DeleteStates(); 875f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst_->SetInputSymbols(ifst_->InputSymbols()); 876f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst_->SetOutputSymbols(ifst_->OutputSymbols()); 877f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 878f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson limit_ = Times(DistanceToDest(ifst_->Start(), rfst_.Start() - 1), threshold); 879f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson flags_.clear(); 880f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 881f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ProcStart(); 882f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 883f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (!queue_.Empty()) { 884f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = queue_.Head(); 885f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson queue_.Dequeue(); 886f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SetFlags(s, kExpanded, kExpanded | kEnqueued); 887f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << s << " dequeued!"; 888f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 889f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ProcFinal(s); 890f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackId stack_id = state_table_.Tuple(s).stack_id; 891f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ProcDestStates(s, stack_id); 892f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 893f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator<ExpandFst<Arc> > aiter(efst_, s); 894f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !aiter.Done(); 895f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Next()) { 896f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arc = aiter.Value(); 897f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StackId nextstack_id = state_table_.Tuple(arc.nextstate).stack_id; 898f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (stack_id == nextstack_id) 899f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ProcNonParen(s, arc, true); 900f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else if (stack_id == stack_.Pop(nextstack_id)) 901f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ProcOpenParen(s, arc, stack_id, nextstack_id); 902f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else 903f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ProcCloseParen(s, arc); 904f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 905f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "d[" << s << "] = " << Distance(s) 906f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << ", fd[" << s << "] = " << FinalDistance(s); 907f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 908f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 909f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 910f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 911f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Expand() Functions 912f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 913f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 914f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc> 915f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ExpandOptions { 916f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool connect; 917f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool keep_parentheses; 918f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Weight weight_threshold; 919f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 920f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandOptions(bool c = true, bool k = false, 921f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Weight w = Arc::Weight::Zero()) 922f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : connect(c), keep_parentheses(k), weight_threshold(w) {} 923f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 924f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 925f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Expands a pushdown transducer (PDT) encoded as an FST into an FST. 926f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This version writes the expanded PDT result to a MutableFst. 927f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// In the PDT, some transitions are labeled with open or close 928f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// parentheses. To be interpreted as a PDT, the parens must balance on 929f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// a path. The open-close parenthesis label pairs are passed in 930f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'parens'. The expansion enforces the parenthesis constraints. The 931f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// PDT must be expandable as an FST. 932f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc> 933f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Expand( 934f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<Arc> &ifst, 935f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<typename Arc::Label, typename Arc::Label> > &parens, 936f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, 937f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ExpandOptions<Arc> &opts) { 938f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Label Label; 939f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::StateId StateId; 940f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Weight Weight; 941f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename ExpandFst<Arc>::StackId StackId; 942f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 943f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ExpandFstOptions<Arc> eopts; 944f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson eopts.gc_limit = 0; 945f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.weight_threshold == Weight::Zero()) { 946f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson eopts.keep_parentheses = opts.keep_parentheses; 947f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *ofst = ExpandFst<Arc>(ifst, parens, eopts); 948f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 949f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PrunedExpand<Arc> pruned_expand(ifst, parens, opts.keep_parentheses); 950f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pruned_expand.Expand(ofst, opts.weight_threshold); 951f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 952f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 953f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.connect) 954f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Connect(ofst); 955f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 956f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 957f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Expands a pushdown transducer (PDT) encoded as an FST into an FST. 958f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// This version writes the expanded PDT result to a MutableFst. 959f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// In the PDT, some transitions are labeled with open or close 960f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// parentheses. To be interpreted as a PDT, the parens must balance on 961f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// a path. The open-close parenthesis label pairs are passed in 962f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'parens'. The expansion enforces the parenthesis constraints. The 963f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// PDT must be expandable as an FST. 964f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 965f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Expand( 966f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Fst<Arc> &ifst, 967f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<typename Arc::Label, typename Arc::Label> > &parens, 968f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, 969f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool connect = true, bool keep_parentheses = false) { 970f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Expand(ifst, parens, ofst, ExpandOptions<Arc>(connect, keep_parentheses)); 971f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 972f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 973f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 974f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 975f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_EXTENSIONS_PDT_EXPAND_H__ 976