info.h revision dfd8b8327b93660601d016cdc6f29f433b45a8d8
1// info.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Prints information about a PDT.
20
21#ifndef FST_EXTENSIONS_PDT_INFO_H__
22#define FST_EXTENSIONS_PDT_INFO_H__
23
24#include <unordered_map>
25using std::tr1::unordered_map;
26using std::tr1::unordered_multimap;
27#include <unordered_set>
28using std::tr1::unordered_set;
29using std::tr1::unordered_multiset;
30#include <vector>
31using std::vector;
32
33#include <fst/fst.h>
34#include <fst/extensions/pdt/pdt.h>
35
36namespace fst {
37
38// Compute various information about PDTs, helper class for pdtinfo.cc.
39template <class A> class PdtInfo {
40public:
41  typedef A Arc;
42  typedef typename A::StateId StateId;
43  typedef typename A::Label Label;
44  typedef typename A::Weight Weight;
45
46  PdtInfo(const Fst<A> &fst,
47          const vector<pair<typename A::Label,
48          typename A::Label> > &parens);
49
50  const string& FstType() const { return fst_type_; }
51  const string& ArcType() const { return A::Type(); }
52
53  int64 NumStates() const { return nstates_; }
54  int64 NumArcs() const { return narcs_; }
55  int64 NumOpenParens() const { return nopen_parens_; }
56  int64 NumCloseParens() const { return nclose_parens_; }
57  int64 NumUniqueOpenParens() const { return nuniq_open_parens_; }
58  int64 NumUniqueCloseParens() const { return nuniq_close_parens_; }
59  int64 NumOpenParenStates() const { return nopen_paren_states_; }
60  int64 NumCloseParenStates() const { return nclose_paren_states_; }
61
62 private:
63  string fst_type_;
64  int64 nstates_;
65  int64 narcs_;
66  int64 nopen_parens_;
67  int64 nclose_parens_;
68  int64 nuniq_open_parens_;
69  int64 nuniq_close_parens_;
70  int64 nopen_paren_states_;
71  int64 nclose_paren_states_;
72
73  DISALLOW_COPY_AND_ASSIGN(PdtInfo);
74};
75
76template <class A>
77PdtInfo<A>::PdtInfo(const Fst<A> &fst,
78                 const vector<pair<typename A::Label,
79                                   typename A::Label> > &parens)
80  : fst_type_(fst.Type()),
81    nstates_(0),
82    narcs_(0),
83    nopen_parens_(0),
84    nclose_parens_(0),
85    nuniq_open_parens_(0),
86    nuniq_close_parens_(0),
87    nopen_paren_states_(0),
88    nclose_paren_states_(0) {
89  unordered_map<Label, size_t> paren_map;
90  unordered_set<Label> paren_set;
91  unordered_set<StateId> open_paren_state_set;
92  unordered_set<StateId> close_paren_state_set;
93
94  for (size_t i = 0; i < parens.size(); ++i) {
95    const pair<Label, Label>  &p = parens[i];
96    paren_map[p.first] = i;
97    paren_map[p.second] = i;
98  }
99
100  for (StateIterator< Fst<A> > siter(fst);
101       !siter.Done();
102       siter.Next()) {
103    ++nstates_;
104    StateId s = siter.Value();
105    for (ArcIterator< Fst<A> > aiter(fst, s);
106         !aiter.Done();
107         aiter.Next()) {
108      const A &arc = aiter.Value();
109      ++narcs_;
110      typename unordered_map<Label, size_t>::const_iterator pit
111        = paren_map.find(arc.ilabel);
112      if (pit != paren_map.end()) {
113        Label open_paren =  parens[pit->second].first;
114        Label close_paren =  parens[pit->second].second;
115        if (arc.ilabel == open_paren) {
116          ++nopen_parens_;
117          if (!paren_set.count(open_paren)) {
118            ++nuniq_open_parens_;
119            paren_set.insert(open_paren);
120          }
121          if (!open_paren_state_set.count(arc.nextstate)) {
122            ++nopen_paren_states_;
123            open_paren_state_set.insert(arc.nextstate);
124          }
125        } else {
126          ++nclose_parens_;
127          if (!paren_set.count(close_paren)) {
128            ++nuniq_close_parens_;
129            paren_set.insert(close_paren);
130          }
131          if (!close_paren_state_set.count(s)) {
132            ++nclose_paren_states_;
133            close_paren_state_set.insert(s);
134          }
135
136        }
137      }
138    }
139  }
140}
141
142
143template <class A>
144void PrintPdtInfo(const PdtInfo<A> &pdtinfo) {
145  ios_base::fmtflags old = cout.setf(ios::left);
146  cout.width(50);
147  cout << "fst type" << pdtinfo.FstType().c_str() << endl;
148  cout.width(50);
149  cout << "arc type" << pdtinfo.ArcType().c_str() << endl;
150  cout.width(50);
151  cout << "# of states" << pdtinfo.NumStates() << endl;
152  cout.width(50);
153  cout << "# of arcs" << pdtinfo.NumArcs() << endl;
154  cout.width(50);
155  cout << "# of open parentheses" << pdtinfo.NumOpenParens() << endl;
156  cout.width(50);
157  cout << "# of close parentheses" << pdtinfo.NumCloseParens() << endl;
158  cout.width(50);
159  cout << "# of unique open parentheses"
160       << pdtinfo.NumUniqueOpenParens() << endl;
161  cout.width(50);
162  cout << "# of unique close parentheses"
163       << pdtinfo.NumUniqueCloseParens() << endl;
164  cout.width(50);
165  cout << "# of open parenthesis dest. states"
166       << pdtinfo.NumOpenParenStates() << endl;
167  cout.width(50);
168  cout << "# of close parenthesis source states"
169       << pdtinfo.NumCloseParenStates() << endl;
170  cout.setf(old);
171}
172
173}  // namespace fst
174
175#endif  // FST_EXTENSIONS_PDT_INFO_H__
176