1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// info.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Prints information about a PDT. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_EXTENSIONS_PDT_INFO_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_EXTENSIONS_PDT_INFO_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <unordered_map> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map; 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap; 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <tr1/unordered_set> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_set; 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multiset; 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h> 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/extensions/pdt/pdt.h> 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Compute various information about PDTs, helper class for pdtinfo.cc. 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> class PdtInfo { 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonpublic: 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson PdtInfo(const Fst<A> &fst, 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<typename A::Label, 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename A::Label> > &parens); 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const string& FstType() const { return fst_type_; } 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const string& ArcType() const { return A::Type(); } 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 NumStates() const { return nstates_; } 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 NumArcs() const { return narcs_; } 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 NumOpenParens() const { return nopen_parens_; } 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 NumCloseParens() const { return nclose_parens_; } 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 NumUniqueOpenParens() const { return nuniq_open_parens_; } 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 NumUniqueCloseParens() const { return nuniq_close_parens_; } 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 NumOpenParenStates() const { return nopen_paren_states_; } 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 NumCloseParenStates() const { return nclose_paren_states_; } 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string fst_type_; 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 nstates_; 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 narcs_; 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 nopen_parens_; 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 nclose_parens_; 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 nuniq_open_parens_; 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 nuniq_close_parens_; 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 nopen_paren_states_; 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 nclose_paren_states_; 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(PdtInfo); 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonPdtInfo<A>::PdtInfo(const Fst<A> &fst, 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const vector<pair<typename A::Label, 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename A::Label> > &parens) 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : fst_type_(fst.Type()), 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nstates_(0), 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson narcs_(0), 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nopen_parens_(0), 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nclose_parens_(0), 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nuniq_open_parens_(0), 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nuniq_close_parens_(0), 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nopen_paren_states_(0), 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nclose_paren_states_(0) { 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_map<Label, size_t> paren_map; 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_set<Label> paren_set; 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_set<StateId> open_paren_state_set; 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_set<StateId> close_paren_state_set; 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < parens.size(); ++i) { 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const pair<Label, Label> &p = parens[i]; 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson paren_map[p.first] = i; 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson paren_map[p.second] = i; 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (StateIterator< Fst<A> > siter(fst); 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !siter.Done(); 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson siter.Next()) { 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++nstates_; 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = siter.Value(); 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (ArcIterator< Fst<A> > aiter(fst, s); 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson !aiter.Done(); 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Next()) { 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A &arc = aiter.Value(); 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++narcs_; 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename unordered_map<Label, size_t>::const_iterator pit 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson = paren_map.find(arc.ilabel); 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (pit != paren_map.end()) { 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label open_paren = parens[pit->second].first; 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label close_paren = parens[pit->second].second; 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (arc.ilabel == open_paren) { 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++nopen_parens_; 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!paren_set.count(open_paren)) { 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++nuniq_open_parens_; 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson paren_set.insert(open_paren); 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!open_paren_state_set.count(arc.nextstate)) { 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++nopen_paren_states_; 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson open_paren_state_set.insert(arc.nextstate); 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++nclose_parens_; 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!paren_set.count(close_paren)) { 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++nuniq_close_parens_; 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson paren_set.insert(close_paren); 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!close_paren_state_set.count(s)) { 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ++nclose_paren_states_; 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson close_paren_state_set.insert(s); 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid PrintPdtInfo(const PdtInfo<A> &pdtinfo) { 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ios_base::fmtflags old = cout.setf(ios::left); 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.width(50); 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout << "fst type" << pdtinfo.FstType().c_str() << endl; 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.width(50); 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout << "arc type" << pdtinfo.ArcType().c_str() << endl; 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.width(50); 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout << "# of states" << pdtinfo.NumStates() << endl; 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.width(50); 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout << "# of arcs" << pdtinfo.NumArcs() << endl; 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.width(50); 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout << "# of open parentheses" << pdtinfo.NumOpenParens() << endl; 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.width(50); 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout << "# of close parentheses" << pdtinfo.NumCloseParens() << endl; 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.width(50); 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout << "# of unique open parentheses" 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << pdtinfo.NumUniqueOpenParens() << endl; 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.width(50); 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout << "# of unique close parentheses" 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << pdtinfo.NumUniqueCloseParens() << endl; 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.width(50); 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout << "# of open parenthesis dest. states" 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << pdtinfo.NumOpenParenStates() << endl; 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.width(50); 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout << "# of close parenthesis source states" 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << pdtinfo.NumCloseParenStates() << endl; 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson cout.setf(old); 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_EXTENSIONS_PDT_INFO_H__ 176