1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// shortest-path.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Functions to find shortest paths in an FST.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_SHORTEST_PATH_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_SHORTEST_PATH_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <functional>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <utility>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::pair; using std::make_pair;
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/cache.h>
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/determinize.h>
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/queue.h>
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/shortest-distance.h>
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h>
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc, class Queue, class ArcFilter>
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct ShortestPathOptions
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    : public ShortestDistanceOptions<Arc, Queue, ArcFilter> {
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Weight Weight;
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t nshortest;   // return n-shortest paths
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool unique;        // only return paths with distinct input strings
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool has_distance;  // distance vector already contains the
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      // shortest distance from the initial state
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool first_path;    // Single shortest path stops after finding the first
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      // path to a final state. That path is the shortest path
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      // only when using the ShortestFirstQueue and
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      // only when all the weights in the FST are between
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      // One() and Zero() according to NaturalLess.
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight weight_threshold;   // pruning weight threshold.
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId state_threshold;   // pruning state threshold.
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ShortestPathOptions(Queue *q, ArcFilter filt, size_t n = 1, bool u = false,
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      bool hasdist = false, float d = kDelta,
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      bool fp = false, Weight w = Weight::Zero(),
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      StateId s = kNoStateId)
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : ShortestDistanceOptions<Arc, Queue, ArcFilter>(q, filt, kNoStateId, d),
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        nshortest(n), unique(u), has_distance(hasdist), first_path(fp),
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        weight_threshold(w), state_threshold(s) {}
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Shortest-path algorithm: normally not called directly; prefer
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ShortestPath' below with n=1. 'ofst' contains the shortest path in
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ifst'. 'distance' returns the shortest distances from the source
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// state to each state in 'ifst'. 'opts' is used to specify options
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// such as the queue discipline, the arc filter and delta.
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The shortest path is the lowest weight path w.r.t. the natural
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// semiring order.
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The weights need to be right distributive and have the path (kPath)
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// property.
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc, class Queue, class ArcFilter>
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid SingleShortestPath(const Fst<Arc> &ifst,
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  MutableFst<Arc> *ofst,
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  vector<typename Arc::Weight> *distance,
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  ShortestPathOptions<Arc, Queue, ArcFilter> &opts) {
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Weight Weight;
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->DeleteStates();
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetInputSymbols(ifst.InputSymbols());
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetOutputSymbols(ifst.OutputSymbols());
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (ifst.Start() == kNoStateId) {
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return;
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<bool> enqueued;
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<StateId> parent;
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Arc> arc_parent;
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Queue *state_queue = opts.state_queue;
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId source = opts.source == kNoStateId ? ifst.Start() : opts.source;
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight f_distance = Weight::Zero();
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId f_parent = kNoStateId;
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  distance->clear();
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  state_queue->Clear();
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.nshortest != 1) {
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "SingleShortestPath: for nshortest > 1, use ShortestPath"
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << " instead";
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ofst->SetProperties(kError, kError);
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return;
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (opts.weight_threshold != Weight::Zero() ||
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      opts.state_threshold != kNoStateId) {
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() <<
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        "SingleShortestPath: weight and state thresholds not applicable";
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ofst->SetProperties(kError, kError);
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return;
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if ((Weight::Properties() & (kPath | kRightSemiring))
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      != (kPath | kRightSemiring)) {
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "SingleShortestPath: Weight needs to have the path"
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << " property and be right distributive: " << Weight::Type();
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ofst->SetProperties(kError, kError);
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return;
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  while (distance->size() < source) {
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    distance->push_back(Weight::Zero());
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    enqueued.push_back(false);
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    parent.push_back(kNoStateId);
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId));
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  distance->push_back(Weight::One());
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  parent.push_back(kNoStateId);
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId));
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  state_queue->Enqueue(source);
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  enqueued.push_back(true);
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  while (!state_queue->Empty()) {
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s = state_queue->Head();
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    state_queue->Dequeue();
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    enqueued[s] = false;
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight sd = (*distance)[s];
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (ifst.Final(s) != Weight::Zero()) {
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Weight w = Times(sd, ifst.Final(s));
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (f_distance != Plus(f_distance, w)) {
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        f_distance = Plus(f_distance, w);
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        f_parent = s;
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!f_distance.Member()) {
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ofst->SetProperties(kError, kError);
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return;
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (opts.first_path)
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        break;
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (ArcIterator< Fst<Arc> > aiter(ifst, s);
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         !aiter.Done();
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         aiter.Next()) {
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const Arc &arc = aiter.Value();
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      while (distance->size() <= arc.nextstate) {
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        distance->push_back(Weight::Zero());
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        enqueued.push_back(false);
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        parent.push_back(kNoStateId);
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(),
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                 kNoStateId));
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Weight &nd = (*distance)[arc.nextstate];
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Weight w = Times(sd, arc.weight);
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (nd != Plus(nd, w)) {
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        nd = Plus(nd, w);
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (!nd.Member()) {
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          ofst->SetProperties(kError, kError);
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          return;
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        parent[arc.nextstate] = s;
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        arc_parent[arc.nextstate] = arc;
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (!enqueued[arc.nextstate]) {
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          state_queue->Enqueue(arc.nextstate);
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          enqueued[arc.nextstate] = true;
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        } else {
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          state_queue->Update(arc.nextstate);
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId s_p = kNoStateId, d_p = kNoStateId;
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (StateId s = f_parent, d = kNoStateId;
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       s != kNoStateId;
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       d = s, s = parent[s]) {
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    d_p = s_p;
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    s_p = ofst->AddState();
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (d == kNoStateId) {
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ofst->SetFinal(s_p, ifst.Final(f_parent));
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      arc_parent[d].nextstate = d_p;
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ofst->AddArc(s_p, arc_parent[d]);
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetStart(s_p);
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetProperties(
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ShortestPathProperties(ofst->Properties(kFstProperties, false)),
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      kFstProperties);
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class S, class W>
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ShortestPathCompare {
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef S StateId;
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef W Weight;
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef pair<StateId, Weight> Pair;
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ShortestPathCompare(const vector<Pair>& pairs,
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      const vector<Weight>& distance,
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      StateId sfinal, float d)
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : pairs_(pairs), distance_(distance), superfinal_(sfinal), delta_(d)  {}
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool operator()(const StateId x, const StateId y) const {
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const Pair &px = pairs_[x];
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const Pair &py = pairs_[y];
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight dx = px.first == superfinal_ ? Weight::One() :
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        px.first < distance_.size() ? distance_[px.first] : Weight::Zero();
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight dy = py.first == superfinal_ ? Weight::One() :
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        py.first < distance_.size() ? distance_[py.first] : Weight::Zero();
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight wx = Times(dx, px.second);
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight wy = Times(dy, py.second);
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // Penalize complete paths to ensure correct results with inexact weights.
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // This forms a strict weak order so long as ApproxEqual(a, b) =>
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    // ApproxEqual(a, c) for all c s.t. less_(a, c) && less_(c, b).
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (px.first == superfinal_ && py.first != superfinal_) {
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return less_(wy, wx) || ApproxEqual(wx, wy, delta_);
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (py.first == superfinal_ && px.first != superfinal_) {
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return less_(wy, wx) && !ApproxEqual(wx, wy, delta_);
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return less_(wy, wx);
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<Pair> &pairs_;
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<Weight> &distance_;
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId superfinal_;
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  float delta_;
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  NaturalLess<Weight> less_;
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// N-Shortest-path algorithm: implements the core n-shortest path
250f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// algorithm. The output is built REVERSED. See below for versions with
251f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// more options and not reversed.
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ofst' contains the REVERSE of 'n'-shortest paths in 'ifst'.
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'distance' must contain the shortest distance from each state to a final
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// state in 'ifst'. 'delta' is the convergence delta.
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The n-shortest paths are the n-lowest weight paths w.r.t. the
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// natural semiring order. The single path that can be read from the
259f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ith of at most n transitions leaving the initial state of 'ofst' is
260f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the ith shortest path. Disregarding the initial state and initial
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transitions, the n-shortest paths, in fact, form a tree rooted at
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the single final state.
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The weights need to be left and right distributive (kSemiring) and
265f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// have the path (kPath) property.
266f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The algorithm is from Mohri and Riley, "An Efficient Algorithm for
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the n-best-strings problem", ICSLP 2002. The algorithm relies on
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the shortest-distance algorithm. There are some issues with the
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// pseudo-code as written in the paper (viz., line 11).
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// IMPLEMENTATION NOTE: The input fst 'ifst' can be a delayed fst and
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// and at any state in its expansion the values of distance vector need only
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// be defined at that time for the states that are known to exist.
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc, class RevArc>
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid NShortestPath(const Fst<RevArc> &ifst,
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   MutableFst<Arc> *ofst,
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   const vector<typename Arc::Weight> &distance,
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   size_t n,
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   float delta = kDelta,
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   typename Arc::Weight weight_threshold = Arc::Weight::Zero(),
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   typename Arc::StateId state_threshold = kNoStateId) {
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Weight Weight;
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef pair<StateId, Weight> Pair;
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename RevArc::Weight RevWeight;
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (n <= 0) return;
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) {
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "NShortestPath: Weight needs to have the "
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << "path property and be distributive: "
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << Weight::Type();
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ofst->SetProperties(kError, kError);
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return;
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->DeleteStates();
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetInputSymbols(ifst.InputSymbols());
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetOutputSymbols(ifst.OutputSymbols());
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Each state in 'ofst' corresponds to a path with weight w from the
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // initial state of 'ifst' to a state s in 'ifst', that can be
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // characterized by a pair (s,w).  The vector 'pairs' maps each
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // state in 'ofst' to the corresponding pair maps states in OFST to
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // the corresponding pair (s,w).
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Pair> pairs;
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // The supefinal state is denoted by -1, 'compare' knows that the
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // distance from 'superfinal' to the final state is 'Weight::One()',
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // hence 'distance[superfinal]' is not needed.
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId superfinal = -1;
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ShortestPathCompare<StateId, Weight>
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    compare(pairs, distance, superfinal, delta);
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<StateId> heap;
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // 'r[s + 1]', 's' state in 'fst', is the number of states in 'ofst'
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // which corresponding pair contains 's' ,i.e. , it is number of
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // paths computed so far to 's'. Valid for 's == -1' (superfinal).
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<int> r;
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  NaturalLess<Weight> less;
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (ifst.Start() == kNoStateId ||
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      distance.size() <= ifst.Start() ||
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      distance[ifst.Start()] == Weight::Zero() ||
320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      less(weight_threshold, Weight::One()) ||
321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      state_threshold == 0) {
322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return;
324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetStart(ofst->AddState());
326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId final = ofst->AddState();
327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetFinal(final, Weight::One());
328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  while (pairs.size() <= final)
329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    pairs.push_back(Pair(kNoStateId, Weight::Zero()));
330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  pairs[final] = Pair(ifst.Start(), Weight::One());
331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  heap.push_back(final);
332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight limit = Times(distance[ifst.Start()], weight_threshold);
333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  while (!heap.empty()) {
335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    pop_heap(heap.begin(), heap.end(), compare);
336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId state = heap.back();
337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Pair p = pairs[state];
338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    heap.pop_back();
339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight d = p.first == superfinal ? Weight::One() :
340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        p.first < distance.size() ? distance[p.first] : Weight::Zero();
341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (less(limit, Times(d, p.second)) ||
343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (state_threshold != kNoStateId &&
344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         ofst->NumStates() >= state_threshold))
345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      continue;
346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    while (r.size() <= p.first + 1) r.push_back(0);
348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ++r[p.first + 1];
349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (p.first == superfinal)
350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ofst->AddArc(ofst->Start(), Arc(0, 0, Weight::One(), state));
351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if ((p.first == superfinal) && (r[p.first + 1] == n)) break;
352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (r[p.first + 1] > n) continue;
353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (p.first == superfinal) continue;
354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (ArcIterator< Fst<RevArc> > aiter(ifst, p.first);
356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         !aiter.Done();
357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         aiter.Next()) {
358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const RevArc &rarc = aiter.Value();
359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Arc arc(rarc.ilabel, rarc.olabel, rarc.weight.Reverse(), rarc.nextstate);
360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Weight w = Times(p.second, arc.weight);
361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId next = ofst->AddState();
362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      pairs.push_back(Pair(arc.nextstate, w));
363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      arc.nextstate = state;
364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ofst->AddArc(next, arc);
365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      heap.push_back(next);
366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      push_heap(heap.begin(), heap.end(), compare);
367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Weight finalw = ifst.Final(p.first).Reverse();
370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (finalw != Weight::Zero()) {
371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Weight w = Times(p.second, finalw);
372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId next = ofst->AddState();
373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      pairs.push_back(Pair(superfinal, w));
374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ofst->AddArc(next, Arc(0, 0, finalw, state));
375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      heap.push_back(next);
376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      push_heap(heap.begin(), heap.end(), compare);
377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Connect(ofst);
380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError);
381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetProperties(
382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ShortestPathProperties(ofst->Properties(kFstProperties, false)),
383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      kFstProperties);
384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// N-Shortest-path algorithm:  this version allow fine control
388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// via the options argument. See below for a simpler interface.
389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ofst' contains the n-shortest paths in 'ifst'. 'distance' returns
391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the shortest distances from the source state to each state in
392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ifst'. 'opts' is used to specify options such as the number of
393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// paths to return, whether they need to have distinct input
394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// strings, the queue discipline, the arc filter and the convergence
395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// delta.
396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The n-shortest paths are the n-lowest weight paths w.r.t. the
398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// natural semiring order. The single path that can be read from the
399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ith of at most n transitions leaving the initial state of 'ofst' is
400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the ith shortest path. Disregarding the initial state and initial
401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// transitions, The n-shortest paths, in fact, form a tree rooted at
402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the single final state.
403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The weights need to be right distributive and have the path (kPath)
405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// property. They need to be left distributive as well for nshortest
406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// > 1.
407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The algorithm is from Mohri and Riley, "An Efficient Algorithm for
409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the n-best-strings problem", ICSLP 2002. The algorithm relies on
410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the shortest-distance algorithm. There are some issues with the
411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// pseudo-code as written in the paper (viz., line 11).
412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc, class Queue, class ArcFilter>
413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  vector<typename Arc::Weight> *distance,
415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  ShortestPathOptions<Arc, Queue, ArcFilter> &opts) {
416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Weight Weight;
418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef ReverseArc<Arc> ReverseArc;
419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  size_t n = opts.nshortest;
421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (n == 1) {
422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    SingleShortestPath(ifst, ofst, distance, opts);
423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return;
424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (n <= 0) return;
426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) {
427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "ShortestPath: n-shortest: Weight needs to have the "
428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << "path property and be distributive: "
429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << Weight::Type();
430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ofst->SetProperties(kError, kError);
431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return;
432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!opts.has_distance) {
434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ShortestDistance(ifst, distance, opts);
435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (distance->size() == 1 && !(*distance)[0].Member()) {
436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ofst->SetProperties(kError, kError);
437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return;
438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Algorithm works on the reverse of 'fst' : 'rfst', 'distance' is
441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // the distance to the final state in 'rfst', 'ofst' is built as the
442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // reverse of the tree of n-shortest path in 'rfst'.
443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  VectorFst<ReverseArc> rfst;
444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Reverse(ifst, &rfst);
445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Weight d = Weight::Zero();
446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (ArcIterator< VectorFst<ReverseArc> > aiter(rfst, 0);
447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       !aiter.Done(); aiter.Next()) {
448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const ReverseArc &arc = aiter.Value();
449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s = arc.nextstate - 1;
450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (s < distance->size())
451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      d = Plus(d, Times(arc.weight.Reverse(), (*distance)[s]));
452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  distance->insert(distance->begin(), d);
454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!opts.unique) {
456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    NShortestPath(rfst, ofst, *distance, n, opts.delta,
457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  opts.weight_threshold, opts.state_threshold);
458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<Weight> ddistance;
460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    DeterminizeFstOptions<ReverseArc> dopts(opts.delta);
4615b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    DeterminizeFst<ReverseArc> dfst(rfst, distance, &ddistance, dopts);
462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    NShortestPath(dfst, ofst, ddistance, n, opts.delta,
463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  opts.weight_threshold, opts.state_threshold);
464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  distance->erase(distance->begin());
466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Shortest-path algorithm: simplified interface. See above for a
470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// version that allows finer control.
471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 'ofst' contains the 'n'-shortest paths in 'ifst'. The queue
473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// discipline is automatically selected. When 'unique' == true, only
474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// paths with distinct input labels are returned.
475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The n-shortest paths are the n-lowest weight paths w.r.t. the
477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// natural semiring order. The single path that can be read from the
478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ith of at most n transitions leaving the initial state of 'ofst' is
479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// the ith best path.
480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The weights need to be right distributive and have the path
482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (kPath) property.
483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc>
484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  size_t n = 1, bool unique = false,
486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  bool first_path = false,
487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  typename Arc::Weight weight_threshold = Arc::Weight::Zero(),
488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  typename Arc::StateId state_threshold = kNoStateId) {
489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<typename Arc::Weight> distance;
490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  AnyArcFilter<Arc> arc_filter;
491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  AutoQueue<typename Arc::StateId> state_queue(ifst, &distance, arc_filter);
492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ShortestPathOptions< Arc, AutoQueue<typename Arc::StateId>,
493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      AnyArcFilter<Arc> > opts(&state_queue, arc_filter, n, unique, false,
494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               kDelta, first_path, weight_threshold,
495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               state_threshold);
496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ShortestPath(ifst, ofst, &distance, opts);
497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_SHORTEST_PATH_H__
502