1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// shortest-path.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Functions to find shortest paths in an FST. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_SHORTEST_PATH_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_SHORTEST_PATH_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <functional> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility> 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair; 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h> 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/determinize.h> 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/queue.h> 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/shortest-distance.h> 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h> 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc, class Queue, class ArcFilter> 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ShortestPathOptions 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : public ShortestDistanceOptions<Arc, Queue, ArcFilter> { 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::StateId StateId; 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Weight Weight; 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t nshortest; // return n-shortest paths 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool unique; // only return paths with distinct input strings 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool has_distance; // distance vector already contains the 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // shortest distance from the initial state 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool first_path; // Single shortest path stops after finding the first 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // path to a final state. That path is the shortest path 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // only when using the ShortestFirstQueue and 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // only when all the weights in the FST are between 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // One() and Zero() according to NaturalLess. 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight weight_threshold; // pruning weight threshold. 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId state_threshold; // pruning state threshold. 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestPathOptions(Queue *q, ArcFilter filt, size_t n = 1, bool u = false, 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool hasdist = false, float d = kDelta, 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool fp = false, Weight w = Weight::Zero(), 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = kNoStateId) 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : ShortestDistanceOptions<Arc, Queue, ArcFilter>(q, filt, kNoStateId, d), 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nshortest(n), unique(u), has_distance(hasdist), first_path(fp), 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson weight_threshold(w), state_threshold(s) {} 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Shortest-path algorithm: normally not called directly; prefer 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ShortestPath' below with n=1. 'ofst' contains the shortest path in 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ifst'. 'distance' returns the shortest distances from the source 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// state to each state in 'ifst'. 'opts' is used to specify options 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// such as the queue discipline, the arc filter and delta. 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The shortest path is the lowest weight path w.r.t. the natural 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// semiring order. 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The weights need to be right distributive and have the path (kPath) 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// property. 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc, class Queue, class ArcFilter> 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid SingleShortestPath(const Fst<Arc> &ifst, 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<typename Arc::Weight> *distance, 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestPathOptions<Arc, Queue, ArcFilter> &opts) { 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::StateId StateId; 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Weight Weight; 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->DeleteStates(); 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetInputSymbols(ifst.InputSymbols()); 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetOutputSymbols(ifst.OutputSymbols()); 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ifst.Start() == kNoStateId) { 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<bool> enqueued; 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StateId> parent; 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Arc> arc_parent; 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Queue *state_queue = opts.state_queue; 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId source = opts.source == kNoStateId ? ifst.Start() : opts.source; 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight f_distance = Weight::Zero(); 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId f_parent = kNoStateId; 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson distance->clear(); 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_queue->Clear(); 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.nshortest != 1) { 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "SingleShortestPath: for nshortest > 1, use ShortestPath" 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << " instead"; 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetProperties(kError, kError); 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.weight_threshold != Weight::Zero() || 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson opts.state_threshold != kNoStateId) { 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson "SingleShortestPath: weight and state thresholds not applicable"; 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetProperties(kError, kError); 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((Weight::Properties() & (kPath | kRightSemiring)) 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson != (kPath | kRightSemiring)) { 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "SingleShortestPath: Weight needs to have the path" 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << " property and be right distributive: " << Weight::Type(); 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetProperties(kError, kError); 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (distance->size() < source) { 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson distance->push_back(Weight::Zero()); 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson enqueued.push_back(false); 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson parent.push_back(kNoStateId); 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId)); 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson distance->push_back(Weight::One()); 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson parent.push_back(kNoStateId); 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId)); 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_queue->Enqueue(source); 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson enqueued.push_back(true); 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (!state_queue->Empty()) { 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = state_queue->Head(); 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_queue->Dequeue(); 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson enqueued[s] = false; 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight sd = (*distance)[s]; 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ifst.Final(s) != Weight::Zero()) { 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w = Times(sd, ifst.Final(s)); 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (f_distance != Plus(f_distance, w)) { 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson f_distance = Plus(f_distance, w); 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson f_parent = s; 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!f_distance.Member()) { 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetProperties(kError, kError); 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (opts.first_path) 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson break; 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator< Fst<Arc> > aiter(ifst, s); 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !aiter.Done(); 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Next()) { 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Arc &arc = aiter.Value(); 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (distance->size() <= arc.nextstate) { 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson distance->push_back(Weight::Zero()); 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson enqueued.push_back(false); 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson parent.push_back(kNoStateId); 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kNoStateId)); 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight &nd = (*distance)[arc.nextstate]; 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w = Times(sd, arc.weight); 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (nd != Plus(nd, w)) { 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nd = Plus(nd, w); 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!nd.Member()) { 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetProperties(kError, kError); 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson parent[arc.nextstate] = s; 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc_parent[arc.nextstate] = arc; 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!enqueued[arc.nextstate]) { 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_queue->Enqueue(arc.nextstate); 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson enqueued[arc.nextstate] = true; 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_queue->Update(arc.nextstate); 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s_p = kNoStateId, d_p = kNoStateId; 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateId s = f_parent, d = kNoStateId; 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s != kNoStateId; 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson d = s, s = parent[s]) { 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson d_p = s_p; 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s_p = ofst->AddState(); 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (d == kNoStateId) { 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetFinal(s_p, ifst.Final(f_parent)); 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc_parent[d].nextstate = d_p; 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->AddArc(s_p, arc_parent[d]); 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetStart(s_p); 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetProperties( 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestPathProperties(ofst->Properties(kFstProperties, false)), 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kFstProperties); 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class W> 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ShortestPathCompare { 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef S StateId; 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef W Weight; 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef pair<StateId, Weight> Pair; 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestPathCompare(const vector<Pair>& pairs, 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Weight>& distance, 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId sfinal, float d) 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : pairs_(pairs), distance_(distance), superfinal_(sfinal), delta_(d) {} 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator()(const StateId x, const StateId y) const { 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Pair &px = pairs_[x]; 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const Pair &py = pairs_[y]; 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight dx = px.first == superfinal_ ? Weight::One() : 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson px.first < distance_.size() ? distance_[px.first] : Weight::Zero(); 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight dy = py.first == superfinal_ ? Weight::One() : 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson py.first < distance_.size() ? distance_[py.first] : Weight::Zero(); 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight wx = Times(dx, px.second); 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight wy = Times(dy, py.second); 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Penalize complete paths to ensure correct results with inexact weights. 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // This forms a strict weak order so long as ApproxEqual(a, b) => 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // ApproxEqual(a, c) for all c s.t. less_(a, c) && less_(c, b). 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (px.first == superfinal_ && py.first != superfinal_) { 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return less_(wy, wx) || ApproxEqual(wx, wy, delta_); 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (py.first == superfinal_ && px.first != superfinal_) { 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return less_(wy, wx) && !ApproxEqual(wx, wy, delta_); 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return less_(wy, wx); 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Pair> &pairs_; 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<Weight> &distance_; 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId superfinal_; 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson float delta_; 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NaturalLess<Weight> less_; 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// N-Shortest-path algorithm: implements the core n-shortest path 250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// algorithm. The output is built REVERSED. See below for versions with 251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// more options and not reversed. 252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ofst' contains the REVERSE of 'n'-shortest paths in 'ifst'. 254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'distance' must contain the shortest distance from each state to a final 255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// state in 'ifst'. 'delta' is the convergence delta. 256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The n-shortest paths are the n-lowest weight paths w.r.t. the 258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// natural semiring order. The single path that can be read from the 259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ith of at most n transitions leaving the initial state of 'ofst' is 260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the ith shortest path. Disregarding the initial state and initial 261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transitions, the n-shortest paths, in fact, form a tree rooted at 262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the single final state. 263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The weights need to be left and right distributive (kSemiring) and 265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// have the path (kPath) property. 266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The algorithm is from Mohri and Riley, "An Efficient Algorithm for 268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the n-best-strings problem", ICSLP 2002. The algorithm relies on 269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the shortest-distance algorithm. There are some issues with the 270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// pseudo-code as written in the paper (viz., line 11). 271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// IMPLEMENTATION NOTE: The input fst 'ifst' can be a delayed fst and 273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// and at any state in its expansion the values of distance vector need only 274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// be defined at that time for the states that are known to exist. 275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc, class RevArc> 276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid NShortestPath(const Fst<RevArc> &ifst, 277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson MutableFst<Arc> *ofst, 278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<typename Arc::Weight> &distance, 279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t n, 280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson float delta = kDelta, 281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Weight weight_threshold = Arc::Weight::Zero(), 282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::StateId state_threshold = kNoStateId) { 283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::StateId StateId; 284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Weight Weight; 285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef pair<StateId, Weight> Pair; 286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename RevArc::Weight RevWeight; 287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (n <= 0) return; 289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) { 290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "NShortestPath: Weight needs to have the " 291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "path property and be distributive: " 292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << Weight::Type(); 293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetProperties(kError, kError); 294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->DeleteStates(); 297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetInputSymbols(ifst.InputSymbols()); 298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetOutputSymbols(ifst.OutputSymbols()); 299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Each state in 'ofst' corresponds to a path with weight w from the 300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // initial state of 'ifst' to a state s in 'ifst', that can be 301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // characterized by a pair (s,w). The vector 'pairs' maps each 302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // state in 'ofst' to the corresponding pair maps states in OFST to 303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // the corresponding pair (s,w). 304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Pair> pairs; 305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // The supefinal state is denoted by -1, 'compare' knows that the 306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // distance from 'superfinal' to the final state is 'Weight::One()', 307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // hence 'distance[superfinal]' is not needed. 308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId superfinal = -1; 309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestPathCompare<StateId, Weight> 310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson compare(pairs, distance, superfinal, delta); 311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<StateId> heap; 312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // 'r[s + 1]', 's' state in 'fst', is the number of states in 'ofst' 313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // which corresponding pair contains 's' ,i.e. , it is number of 314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // paths computed so far to 's'. Valid for 's == -1' (superfinal). 315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<int> r; 316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NaturalLess<Weight> less; 317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ifst.Start() == kNoStateId || 318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson distance.size() <= ifst.Start() || 319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson distance[ifst.Start()] == Weight::Zero() || 320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson less(weight_threshold, Weight::One()) || 321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_threshold == 0) { 322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); 323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetStart(ofst->AddState()); 326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId final = ofst->AddState(); 327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetFinal(final, Weight::One()); 328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (pairs.size() <= final) 329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pairs.push_back(Pair(kNoStateId, Weight::Zero())); 330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pairs[final] = Pair(ifst.Start(), Weight::One()); 331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson heap.push_back(final); 332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight limit = Times(distance[ifst.Start()], weight_threshold); 333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (!heap.empty()) { 335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pop_heap(heap.begin(), heap.end(), compare); 336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId state = heap.back(); 337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Pair p = pairs[state]; 338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson heap.pop_back(); 339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight d = p.first == superfinal ? Weight::One() : 340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson p.first < distance.size() ? distance[p.first] : Weight::Zero(); 341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (less(limit, Times(d, p.second)) || 343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson (state_threshold != kNoStateId && 344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->NumStates() >= state_threshold)) 345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson continue; 346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (r.size() <= p.first + 1) r.push_back(0); 348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++r[p.first + 1]; 349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (p.first == superfinal) 350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->AddArc(ofst->Start(), Arc(0, 0, Weight::One(), state)); 351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((p.first == superfinal) && (r[p.first + 1] == n)) break; 352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (r[p.first + 1] > n) continue; 353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (p.first == superfinal) continue; 354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator< Fst<RevArc> > aiter(ifst, p.first); 356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !aiter.Done(); 357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Next()) { 358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const RevArc &rarc = aiter.Value(); 359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Arc arc(rarc.ilabel, rarc.olabel, rarc.weight.Reverse(), rarc.nextstate); 360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w = Times(p.second, arc.weight); 361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId next = ofst->AddState(); 362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pairs.push_back(Pair(arc.nextstate, w)); 363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson arc.nextstate = state; 364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->AddArc(next, arc); 365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson heap.push_back(next); 366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson push_heap(heap.begin(), heap.end(), compare); 367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight finalw = ifst.Final(p.first).Reverse(); 370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (finalw != Weight::Zero()) { 371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight w = Times(p.second, finalw); 372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId next = ofst->AddState(); 373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pairs.push_back(Pair(superfinal, w)); 374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->AddArc(next, Arc(0, 0, finalw, state)); 375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson heap.push_back(next); 376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson push_heap(heap.begin(), heap.end(), compare); 377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Connect(ofst); 380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); 381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetProperties( 382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestPathProperties(ofst->Properties(kFstProperties, false)), 383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kFstProperties); 384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// N-Shortest-path algorithm: this version allow fine control 388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// via the options argument. See below for a simpler interface. 389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ofst' contains the n-shortest paths in 'ifst'. 'distance' returns 391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the shortest distances from the source state to each state in 392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ifst'. 'opts' is used to specify options such as the number of 393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// paths to return, whether they need to have distinct input 394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// strings, the queue discipline, the arc filter and the convergence 395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// delta. 396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The n-shortest paths are the n-lowest weight paths w.r.t. the 398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// natural semiring order. The single path that can be read from the 399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ith of at most n transitions leaving the initial state of 'ofst' is 400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the ith shortest path. Disregarding the initial state and initial 401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transitions, The n-shortest paths, in fact, form a tree rooted at 402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the single final state. 403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The weights need to be right distributive and have the path (kPath) 405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// property. They need to be left distributive as well for nshortest 406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// > 1. 407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The algorithm is from Mohri and Riley, "An Efficient Algorithm for 409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the n-best-strings problem", ICSLP 2002. The algorithm relies on 410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the shortest-distance algorithm. There are some issues with the 411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// pseudo-code as written in the paper (viz., line 11). 412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc, class Queue, class ArcFilter> 413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, 414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<typename Arc::Weight> *distance, 415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestPathOptions<Arc, Queue, ArcFilter> &opts) { 416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::StateId StateId; 417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename Arc::Weight Weight; 418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef ReverseArc<Arc> ReverseArc; 419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t n = opts.nshortest; 421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (n == 1) { 422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SingleShortestPath(ifst, ofst, distance, opts); 423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (n <= 0) return; 426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) { 427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "ShortestPath: n-shortest: Weight needs to have the " 428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "path property and be distributive: " 429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << Weight::Type(); 430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetProperties(kError, kError); 431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!opts.has_distance) { 434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestDistance(ifst, distance, opts); 435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (distance->size() == 1 && !(*distance)[0].Member()) { 436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetProperties(kError, kError); 437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return; 438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Algorithm works on the reverse of 'fst' : 'rfst', 'distance' is 441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // the distance to the final state in 'rfst', 'ofst' is built as the 442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // reverse of the tree of n-shortest path in 'rfst'. 443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VectorFst<ReverseArc> rfst; 444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Reverse(ifst, &rfst); 445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Weight d = Weight::Zero(); 446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator< VectorFst<ReverseArc> > aiter(rfst, 0); 447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !aiter.Done(); aiter.Next()) { 448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const ReverseArc &arc = aiter.Value(); 449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = arc.nextstate - 1; 450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (s < distance->size()) 451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson d = Plus(d, Times(arc.weight.Reverse(), (*distance)[s])); 452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson distance->insert(distance->begin(), d); 454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!opts.unique) { 456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NShortestPath(rfst, ofst, *distance, n, opts.delta, 457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson opts.weight_threshold, opts.state_threshold); 458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Weight> ddistance; 460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DeterminizeFstOptions<ReverseArc> dopts(opts.delta); 4615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin DeterminizeFst<ReverseArc> dfst(rfst, distance, &ddistance, dopts); 462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson NShortestPath(dfst, ofst, ddistance, n, opts.delta, 463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson opts.weight_threshold, opts.state_threshold); 464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson distance->erase(distance->begin()); 466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Shortest-path algorithm: simplified interface. See above for a 470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// version that allows finer control. 471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ofst' contains the 'n'-shortest paths in 'ifst'. The queue 473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// discipline is automatically selected. When 'unique' == true, only 474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// paths with distinct input labels are returned. 475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The n-shortest paths are the n-lowest weight paths w.r.t. the 477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// natural semiring order. The single path that can be read from the 478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ith of at most n transitions leaving the initial state of 'ofst' is 479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the ith best path. 480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The weights need to be right distributive and have the path 482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (kPath) property. 483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, 485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson size_t n = 1, bool unique = false, 486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool first_path = false, 487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Weight weight_threshold = Arc::Weight::Zero(), 488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::StateId state_threshold = kNoStateId) { 489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<typename Arc::Weight> distance; 490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson AnyArcFilter<Arc> arc_filter; 491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson AutoQueue<typename Arc::StateId> state_queue(ifst, &distance, arc_filter); 492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestPathOptions< Arc, AutoQueue<typename Arc::StateId>, 493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson AnyArcFilter<Arc> > opts(&state_queue, arc_filter, n, unique, false, 494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson kDelta, first_path, weight_threshold, 495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson state_threshold); 496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ShortestPath(ifst, ofst, &distance, opts); 497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_SHORTEST_PATH_H__ 502