1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// info.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Prints information about a PDT.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_PDT_INFO_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_PDT_INFO_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <unordered_map>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map;
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap;
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <tr1/unordered_set>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_set;
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multiset;
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector>
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector;
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h>
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/pdt/pdt.h>
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Compute various information about PDTs, helper class for pdtinfo.cc.
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class PdtInfo {
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonpublic:
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  PdtInfo(const Fst<A> &fst,
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          const vector<pair<typename A::Label,
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          typename A::Label> > &parens);
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const string& FstType() const { return fst_type_; }
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const string& ArcType() const { return A::Type(); }
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 NumStates() const { return nstates_; }
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 NumArcs() const { return narcs_; }
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 NumOpenParens() const { return nopen_parens_; }
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 NumCloseParens() const { return nclose_parens_; }
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 NumUniqueOpenParens() const { return nuniq_open_parens_; }
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 NumUniqueCloseParens() const { return nuniq_close_parens_; }
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 NumOpenParenStates() const { return nopen_paren_states_; }
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 NumCloseParenStates() const { return nclose_paren_states_; }
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  string fst_type_;
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 nstates_;
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 narcs_;
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 nopen_parens_;
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 nclose_parens_;
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 nuniq_open_parens_;
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 nuniq_close_parens_;
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 nopen_paren_states_;
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 nclose_paren_states_;
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(PdtInfo);
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonPdtInfo<A>::PdtInfo(const Fst<A> &fst,
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 const vector<pair<typename A::Label,
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                   typename A::Label> > &parens)
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  : fst_type_(fst.Type()),
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nstates_(0),
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    narcs_(0),
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nopen_parens_(0),
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nclose_parens_(0),
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nuniq_open_parens_(0),
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nuniq_close_parens_(0),
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nopen_paren_states_(0),
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    nclose_paren_states_(0) {
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  unordered_map<Label, size_t> paren_map;
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  unordered_set<Label> paren_set;
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  unordered_set<StateId> open_paren_state_set;
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  unordered_set<StateId> close_paren_state_set;
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (size_t i = 0; i < parens.size(); ++i) {
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    const pair<Label, Label>  &p = parens[i];
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    paren_map[p.first] = i;
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    paren_map[p.second] = i;
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (StateIterator< Fst<A> > siter(fst);
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       !siter.Done();
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       siter.Next()) {
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ++nstates_;
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s = siter.Value();
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (ArcIterator< Fst<A> > aiter(fst, s);
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         !aiter.Done();
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         aiter.Next()) {
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const A &arc = aiter.Value();
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++narcs_;
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      typename unordered_map<Label, size_t>::const_iterator pit
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        = paren_map.find(arc.ilabel);
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (pit != paren_map.end()) {
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Label open_paren =  parens[pit->second].first;
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Label close_paren =  parens[pit->second].second;
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (arc.ilabel == open_paren) {
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          ++nopen_parens_;
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (!paren_set.count(open_paren)) {
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            ++nuniq_open_parens_;
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            paren_set.insert(open_paren);
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          }
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (!open_paren_state_set.count(arc.nextstate)) {
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            ++nopen_paren_states_;
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            open_paren_state_set.insert(arc.nextstate);
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          }
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        } else {
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          ++nclose_parens_;
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (!paren_set.count(close_paren)) {
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            ++nuniq_close_parens_;
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            paren_set.insert(close_paren);
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          }
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          if (!close_paren_state_set.count(s)) {
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            ++nclose_paren_states_;
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            close_paren_state_set.insert(s);
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          }
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        }
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrintPdtInfo(const PdtInfo<A> &pdtinfo) {
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ios_base::fmtflags old = cout.setf(ios::left);
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.width(50);
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout << "fst type" << pdtinfo.FstType().c_str() << endl;
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.width(50);
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout << "arc type" << pdtinfo.ArcType().c_str() << endl;
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.width(50);
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout << "# of states" << pdtinfo.NumStates() << endl;
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.width(50);
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout << "# of arcs" << pdtinfo.NumArcs() << endl;
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.width(50);
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout << "# of open parentheses" << pdtinfo.NumOpenParens() << endl;
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.width(50);
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout << "# of close parentheses" << pdtinfo.NumCloseParens() << endl;
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.width(50);
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout << "# of unique open parentheses"
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       << pdtinfo.NumUniqueOpenParens() << endl;
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.width(50);
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout << "# of unique close parentheses"
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       << pdtinfo.NumUniqueCloseParens() << endl;
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.width(50);
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout << "# of open parenthesis dest. states"
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       << pdtinfo.NumOpenParenStates() << endl;
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.width(50);
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout << "# of close parenthesis source states"
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       << pdtinfo.NumCloseParenStates() << endl;
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  cout.setf(old);
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_EXTENSIONS_PDT_INFO_H__
176