1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// paren.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Common classes for PDT parentheses
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_PDT_PAREN_H_
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_PDT_PAREN_H_
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <algorithm>
263da1eb108d36da35333b2d655202791af854996bPrzemyslaw Szczepaniak#include <tr1/unordered_map>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map;
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap;
293da1eb108d36da35333b2d655202791af854996bPrzemyslaw Szczepaniak#include <tr1/unordered_set>
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_set;
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multiset;
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <set>
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/pdt/pdt.h>
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/pdt/collection.h>
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h>
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/dfs-visit.h>
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// ParenState: Pair of an open (close) parenthesis and
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// its destination (source) state.
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass ParenState {
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  struct Hash {
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t operator()(const ParenState<A> &p) const {
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return p.paren_id + p.state_id * kPrime;
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  };
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Label paren_id;     // ID of open (close) paren
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId state_id;   // destination (source) state of open (close) paren
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ParenState() : paren_id(kNoLabel), state_id(kNoStateId) {}
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ParenState(Label p, StateId s) : paren_id(p), state_id(s) {}
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool operator==(const ParenState<A> &p) const {
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (&p == this)
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return true;
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return p.paren_id == this->paren_id && p.state_id == this->state_id;
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool operator!=(const ParenState<A> &p) const { return !(p == *this); }
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool operator<(const ParenState<A> &p) const {
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return paren_id < this->paren.id ||
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        (p.paren_id == this->paren.id && p.state_id < this->state_id);
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  static const size_t kPrime;
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst size_t ParenState<A>::kPrime = 7853;
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Creates an FST-style iterator from STL map and iterator.
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class M>
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass MapIterator {
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename M::const_iterator StlIterator;
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename M::value_type PairType;
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename PairType::second_type ValueType;
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  MapIterator(const M &m, StlIterator iter)
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : map_(m), begin_(iter), iter_(iter) {}
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool Done() const {
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return iter_ == map_.end() || iter_->first != begin_->first;
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ValueType Value() const { return iter_->second; }
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Next() { ++iter_; }
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Reset() { iter_ = begin_; }
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const M &map_;
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StlIterator begin_;
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StlIterator iter_;
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// PdtParenReachable: Provides various parenthesis reachability information
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// on a PDT.
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass PdtParenReachable {
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Maps from state ID to reachable paren IDs from (to) that state.
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_multimap<StateId, Label> ParenMultiMap;
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Maps from paren ID and state ID to reachable state set ID
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_map<ParenState<A>, ssize_t,
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   typename ParenState<A>::Hash> StateSetMap;
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Maps from paren ID and state ID to arcs exiting that state with that
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Label.
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_multimap<ParenState<A>, A,
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                        typename ParenState<A>::Hash> ParenArcMultiMap;
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef MapIterator<ParenMultiMap> ParenIterator;
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef MapIterator<ParenArcMultiMap> ParenArcIterator;
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Collection<ssize_t, StateId>::SetIterator SetIterator;
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Computes close (open) parenthesis reachabilty information for
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // a PDT with bounded stack.
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  PdtParenReachable(const Fst<A> &fst,
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                    const vector<pair<Label, Label> > &parens, bool close)
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : fst_(fst),
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        parens_(parens),
147dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        close_(close),
148dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        error_(false) {
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (Label i = 0; i < parens.size(); ++i) {
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const pair<Label, Label>  &p = parens[i];
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      paren_id_map_[p.first] = i;
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      paren_id_map_[p.second] = i;
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (close_) {
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId start = fst.Start();
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (start == kNoStateId)
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return;
159dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if (!DFSearch(start)) {
160dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        FSTERROR() << "PdtReachable: Underlying cyclicity not supported";
161dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        error_ = true;
162dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      }
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "PdtParenReachable: open paren info not implemented";
165dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      error_ = true;
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
169dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  bool const Error() { return error_; }
170dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Given a state ID, returns an iterator over paren IDs
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // for close (open) parens reachable from that state along balanced
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // paths.
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ParenIterator FindParens(StateId s) const {
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return ParenIterator(paren_multimap_, paren_multimap_.find(s));
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Given a paren ID and a state ID s, returns an iterator over
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // states that can be reached along balanced paths from (to) s that
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // have have close (open) parentheses matching the paren ID exiting
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // (entering) those states.
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SetIterator FindStates(Label paren_id, StateId s) const {
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ParenState<A> paren_state(paren_id, s);
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename StateSetMap::const_iterator id_it = set_map_.find(paren_state);
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (id_it == set_map_.end()) {
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return state_sets_.FindSet(-1);
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return state_sets_.FindSet(id_it->second);
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Given a paren Id and a state ID s, return an iterator over
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // arcs that exit (enter) s and are labeled with a close (open)
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // parenthesis matching the paren ID.
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ParenArcIterator FindParenArcs(Label paren_id, StateId s) const {
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ParenState<A> paren_state(paren_id, s);
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return ParenArcIterator(paren_arc_multimap_,
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                            paren_arc_multimap_.find(paren_state));
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // DFS that gathers paren and state set information.
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Bool returns false when cycle detected.
204dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  bool DFSearch(StateId s);
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Unions state sets together gathered by the DFS.
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void ComputeStateSet(StateId s);
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Gather state set(s) from state 'nexts'.
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void UpdateStateSet(StateId nexts, set<Label> *paren_set,
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      vector< set<StateId> > *state_sets) const;
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const Fst<A> &fst_;
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const vector<pair<Label, Label> > &parens_;         // Paren ID -> Labels
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool close_;                                        // Close/open paren info?
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  unordered_map<Label, Label> paren_id_map_;               // Paren labels -> ID
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ParenMultiMap paren_multimap_;                      // Paren reachability
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ParenArcMultiMap paren_arc_multimap_;               // Paren Arcs
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<char> state_color_;                          // DFS state
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable Collection<ssize_t, StateId> state_sets_;   // Reachable states -> ID
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateSetMap set_map_;                               // ID -> Reachable states
222dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  bool error_;
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(PdtParenReachable);
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// DFS that gathers paren and state set information.
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
228dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkinbool PdtParenReachable<A>::DFSearch(StateId s) {
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (s >= state_color_.size())
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    state_color_.resize(s + 1, kDfsWhite);
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (state_color_[s] == kDfsBlack)
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (state_color_[s] == kDfsGrey)
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  state_color_[s] = kDfsGrey;
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (ArcIterator<Fst<A> > aiter(fst_, s);
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       !aiter.Done();
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       aiter.Next()) {
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const A &arc = aiter.Value();
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename unordered_map<Label, Label>::const_iterator pit
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        = paren_id_map_.find(arc.ilabel);
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (pit != paren_id_map_.end()) {               // paren?
248f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label paren_id = pit->second;
249f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (arc.ilabel == parens_[paren_id].first) {  // open paren
250dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        if (!DFSearch(arc.nextstate))
251dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin          return false;
252f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        for (SetIterator set_iter = FindStates(paren_id, arc.nextstate);
253f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             !set_iter.Done(); set_iter.Next()) {
254f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          for (ParenArcIterator paren_arc_iter =
255f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   FindParenArcs(paren_id, set_iter.Element());
256f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               !paren_arc_iter.Done();
257f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               paren_arc_iter.Next()) {
258f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            const A &cparc = paren_arc_iter.Value();
259dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin            if (!DFSearch(cparc.nextstate))
260dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin              return false;
261f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          }
262f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
263f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
264f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {                                       // non-paren
265dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin      if(!DFSearch(arc.nextstate))
266dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin        return false;
267f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
268f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
269f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ComputeStateSet(s);
270f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  state_color_[s] = kDfsBlack;
271f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return true;
272f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
273f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
274f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unions state sets together gathered by the DFS.
275f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
276f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PdtParenReachable<A>::ComputeStateSet(StateId s) {
277f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  set<Label> paren_set;
278f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector< set<StateId> > state_sets(parens_.size());
279f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (ArcIterator< Fst<A> > aiter(fst_, s);
280f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       !aiter.Done();
281f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       aiter.Next()) {
282f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const A &arc = aiter.Value();
283f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
284f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename unordered_map<Label, Label>::const_iterator pit
285f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        = paren_id_map_.find(arc.ilabel);
286f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (pit != paren_id_map_.end()) {               // paren?
287f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label paren_id = pit->second;
288f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (arc.ilabel == parens_[paren_id].first) {  // open paren
289f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        for (SetIterator set_iter =
290f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 FindStates(paren_id, arc.nextstate);
291f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             !set_iter.Done(); set_iter.Next()) {
292f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          for (ParenArcIterator paren_arc_iter =
293f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   FindParenArcs(paren_id, set_iter.Element());
294f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               !paren_arc_iter.Done();
295f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               paren_arc_iter.Next()) {
296f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            const A &cparc = paren_arc_iter.Value();
297f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            UpdateStateSet(cparc.nextstate, &paren_set, &state_sets);
298f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          }
299f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
300f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else {                                      // close paren
301f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        paren_set.insert(paren_id);
302f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        state_sets[paren_id].insert(s);
303f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        ParenState<A> paren_state(paren_id, s);
304f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        paren_arc_multimap_.insert(make_pair(paren_state, arc));
305f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
306f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {                                        // non-paren
307f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      UpdateStateSet(arc.nextstate, &paren_set, &state_sets);
308f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
309f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
310f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
311f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<StateId> state_set;
312f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (typename set<Label>::iterator paren_iter = paren_set.begin();
313f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       paren_iter != paren_set.end(); ++paren_iter) {
314f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    state_set.clear();
315f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Label paren_id = *paren_iter;
316f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    paren_multimap_.insert(make_pair(s, paren_id));
317f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (typename set<StateId>::iterator state_iter
318f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             = state_sets[paren_id].begin();
319f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         state_iter != state_sets[paren_id].end();
320f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         ++state_iter) {
321f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      state_set.push_back(*state_iter);
322f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
323f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ParenState<A> paren_state(paren_id, s);
324f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    set_map_[paren_state] = state_sets_.FindId(state_set);
325f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
326f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
327f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
328f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Gather state set(s) from state 'nexts'.
329f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
330f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PdtParenReachable<A>::UpdateStateSet(
331f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId nexts, set<Label> *paren_set,
332f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector< set<StateId> > *state_sets) const {
333f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for(ParenIterator paren_iter = FindParens(nexts);
334f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      !paren_iter.Done(); paren_iter.Next()) {
335f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Label paren_id = paren_iter.Value();
336f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    paren_set->insert(paren_id);
337f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (SetIterator set_iter = FindStates(paren_id, nexts);
338f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         !set_iter.Done(); set_iter.Next()) {
339f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      (*state_sets)[paren_id].insert(set_iter.Element());
340f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
341f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
342f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
343f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
344f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
345f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Store balancing parenthesis data for a PDT. Allows on-the-fly
346f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// construction (e.g. in PdtShortestPath) unlike PdtParenReachable above.
347f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
348f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass PdtBalanceData {
349f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
350f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
351f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
352f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
353f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Hash set for open parens
354f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_set<ParenState<A>, typename ParenState<A>::Hash> OpenParenSet;
355f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
356f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Maps from open paren destination state to parenthesis ID.
357f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_multimap<StateId, Label> OpenParenMap;
358f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
359f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Maps from open paren state to source states of matching close parens
360f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_multimap<ParenState<A>, StateId,
361f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                        typename ParenState<A>::Hash> CloseParenMap;
362f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
363f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Maps from open paren state to close source set ID
364f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef unordered_map<ParenState<A>, ssize_t,
365f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   typename ParenState<A>::Hash> CloseSourceMap;
366f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
367f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Collection<ssize_t, StateId>::SetIterator SetIterator;
368f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
369f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  PdtBalanceData() {}
370f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
371f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Clear() {
372f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    open_paren_map_.clear();
373f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    close_paren_map_.clear();
374f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
375f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
376f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Adds an open parenthesis with destination state 'open_dest'.
377f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void OpenInsert(Label paren_id, StateId open_dest) {
378f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ParenState<A> key(paren_id, open_dest);
379f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!open_paren_set_.count(key)) {
380f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      open_paren_set_.insert(key);
381f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      open_paren_map_.insert(make_pair(open_dest, paren_id));
382f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
383f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
384f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
385f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Adds a matching closing parenthesis with source state
386f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // 'close_source' that balances an open_parenthesis with destination
387f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // state 'open_dest' if OpenInsert() previously called
388f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // (o.w. CloseInsert() does nothing).
389f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void CloseInsert(Label paren_id, StateId open_dest, StateId close_source) {
390f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ParenState<A> key(paren_id, open_dest);
391f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (open_paren_set_.count(key))
392f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      close_paren_map_.insert(make_pair(key, close_source));
393f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
394f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
395f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Find close paren source states matching an open parenthesis.
396f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Methods that follow, iterate through those matching states.
397f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Should be called only after FinishInsert(open_dest).
398f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  SetIterator Find(Label paren_id, StateId open_dest) {
399f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ParenState<A> close_key(paren_id, open_dest);
400f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    typename CloseSourceMap::const_iterator id_it =
401f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        close_source_map_.find(close_key);
402f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (id_it == close_source_map_.end()) {
403f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return close_source_sets_.FindSet(-1);
404f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
405f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return close_source_sets_.FindSet(id_it->second);
406f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
407f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
408f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
409f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Call when all open and close parenthesis insertions wrt open
410f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // parentheses entering 'open_dest' are finished. Must be called
411f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // before Find(open_dest). Stores close paren source state sets
412f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // efficiently.
413f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void FinishInsert(StateId open_dest) {
414f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<StateId> close_sources;
415f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (typename OpenParenMap::iterator oit = open_paren_map_.find(open_dest);
416f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         oit != open_paren_map_.end() && oit->first == open_dest;) {
417f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label paren_id = oit->second;
418f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      close_sources.clear();
419f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ParenState<A> okey(paren_id, open_dest);
420f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      open_paren_set_.erase(open_paren_set_.find(okey));
421f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (typename CloseParenMap::iterator cit = close_paren_map_.find(okey);
422f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           cit != close_paren_map_.end() && cit->first == okey;) {
423f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        close_sources.push_back(cit->second);
424f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        close_paren_map_.erase(cit++);
425f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
426f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      sort(close_sources.begin(), close_sources.end());
427f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      typename vector<StateId>::iterator unique_end =
428f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          unique(close_sources.begin(), close_sources.end());
429f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      close_sources.resize(unique_end - close_sources.begin());
430f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
431f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!close_sources.empty())
432f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        close_source_map_[okey] = close_source_sets_.FindId(close_sources);
433f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      open_paren_map_.erase(oit++);
434f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
435f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
436f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
437f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Return a new balance data object representing the reversed balance
438f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // information.
439f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  PdtBalanceData<A> *Reverse(StateId num_states,
440f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               StateId num_split,
441f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                               StateId state_id_shift) const;
442f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
443f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
444f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  OpenParenSet open_paren_set_;                      // open par. at dest?
445f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
446f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  OpenParenMap open_paren_map_;                      // open parens per state
447f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ParenState<A> open_dest_;                          // cur open dest. state
448f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typename OpenParenMap::const_iterator open_iter_;  // cur open parens/state
449f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
450f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CloseParenMap close_paren_map_;                    // close states/open
451f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                                     //  paren and state
452f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
453f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  CloseSourceMap close_source_map_;                  // paren, state to set ID
454f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  mutable Collection<ssize_t, StateId> close_source_sets_;
455f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
456f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
457f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Return a new balance data object representing the reversed balance
458f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// information.
459f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
460f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonPdtBalanceData<A> *PdtBalanceData<A>::Reverse(
461f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId num_states,
462f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId num_split,
463f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId state_id_shift) const {
464f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  PdtBalanceData<A> *bd = new PdtBalanceData<A>;
465f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  unordered_set<StateId> close_sources;
466f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId split_size = num_states / num_split;
467f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
468f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (StateId i = 0; i < num_states; i+= split_size) {
469f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    close_sources.clear();
470f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
471f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (typename CloseSourceMap::const_iterator
472f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             sit = close_source_map_.begin();
473f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         sit != close_source_map_.end();
474f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         ++sit) {
475f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ParenState<A> okey = sit->first;
476f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      StateId open_dest = okey.state_id;
477f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      Label paren_id = okey.paren_id;
478f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (SetIterator set_iter = close_source_sets_.FindSet(sit->second);
479f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           !set_iter.Done(); set_iter.Next()) {
480f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        StateId close_source = set_iter.Element();
481f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if ((close_source < i) || (close_source >= i + split_size))
482f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          continue;
483f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        close_sources.insert(close_source + state_id_shift);
484f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        bd->OpenInsert(paren_id, close_source + state_id_shift);
485f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        bd->CloseInsert(paren_id, close_source + state_id_shift,
486f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                        open_dest + state_id_shift);
487f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
488f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
489f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
490f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (typename unordered_set<StateId>::const_iterator it
491f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson             = close_sources.begin();
492f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         it != close_sources.end();
493f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         ++it) {
494f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      bd->FinishInsert(*it);
495f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
496f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
497f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
498f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return bd;
499f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
500f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
501f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
502f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
503f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
504f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_EXTENSIONS_PDT_PAREN_H_
505