1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// string.h 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen) 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Utilities to convert strings into FSTs. 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_STRING_H_ 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_STRING_H_ 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compact-fst.h> 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/mutable-fst.h> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDECLARE_string(fst_field_separator); 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Functor compiling a string in an FST 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StringCompiler { 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 }; 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StringCompiler(TokenType type, const SymbolTable *syms = 0, 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label unknown_label = kNoLabel, 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool allow_negative = false) 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : token_type_(type), syms_(syms), unknown_label_(unknown_label), 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson allow_negative_(allow_negative) {} 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Compile string 's' into FST 'fst'. 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class F> 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator()(const string &s, F *fst) { 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Label> labels; 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!ConvertStringToLabels(s, &labels)) 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Compile(labels, fst); 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ConvertStringToLabels(const string &str, vector<Label> *labels) const { 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson labels->clear(); 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (token_type_ == BYTE) { 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < str.size(); ++i) 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson labels->push_back(static_cast<unsigned char>(str[i])); 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (token_type_ == UTF8) { 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return UTF8StringToLabels(str, labels); 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson char *c_str = new char[str.size() + 1]; 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson str.copy(c_str, str.size()); 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson c_str[str.size()] = 0; 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<char *> vec; 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string separator = "\n" + FLAGS_fst_field_separator; 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SplitToVector(c_str, separator.c_str(), &vec, true); 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < vec.size(); ++i) { 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label label; 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!ConvertSymbolToLabel(vec[i], &label)) 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson labels->push_back(label); 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson delete[] c_str; 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Compile(const vector<Label> &labels, MutableFst<A> *fst) const { 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->DeleteStates(); 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (fst->NumStates() <= labels.size()) 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->AddState(); 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < labels.size(); ++i) 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->AddArc(i, Arc(labels[i], labels[i], Weight::One(), i + 1)); 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetStart(0); 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetFinal(labels.size(), Weight::One()); 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson template <class Unsigned> 96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson void Compile(const vector<Label> &labels, CompactFst<A, StringCompactor<A>, 97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Unsigned> *fst) const { 98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fst->SetCompactElements(labels.begin(), labels.end()); 99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool ConvertSymbolToLabel(const char *s, Label* output) const { 102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 n; 103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (syms_) { 104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson n = syms_->Find(s); 105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if ((n == -1) && (unknown_label_ != kNoLabel)) 106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson n = unknown_label_; 107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (n == -1 || (!allow_negative_ && n < 0)) { 108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Symbol \"" << s 109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "\" is not mapped to any integer label, symbol table = " 110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << syms_->Name(); 111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson char *p; 115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson n = strtoll(s, &p, 10); 116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (p < s + strlen(s) || (!allow_negative_ && n < 0)) { 117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Bad label integer " 118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "= \"" << s << "\""; 119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *output = n; 123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson TokenType token_type_; // Token type: symbol, byte or utf8 encoded 127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *syms_; // Symbol table used when token type is symbol 128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Label unknown_label_; // Label for token missing from symbol table 129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool allow_negative_; // Negative labels allowed? 130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(StringCompiler); 132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Functor to print a string FST as a string. 135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A> 136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StringPrinter { 137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public: 138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef A Arc; 139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Label Label; 140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::StateId StateId; 141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typedef typename A::Weight Weight; 142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 }; 144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StringPrinter(TokenType token_type, 146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *syms = 0) 147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson : token_type_(token_type), syms_(syms) {} 148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson // Convert the FST 'fst' into the string 'output' 150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool operator()(const Fst<A> &fst, string *output) { 151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool is_a_string = FstToLabels(fst); 152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!is_a_string) { 153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(1) << "StringPrinter::operator(): Fst is not a string."; 154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson output->clear(); 158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (token_type_ == SYMBOL) { 160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson stringstream sstrm; 161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < labels_.size(); ++i) { 162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (i) 163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson sstrm << *(FLAGS_fst_field_separator.rbegin()); 164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!PrintLabel(labels_[i], sstrm)) 165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson *output = sstrm.str(); 168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (token_type_ == BYTE) { 169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (size_t i = 0; i < labels_.size(); ++i) { 170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson output->push_back(labels_[i]); 171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else if (token_type_ == UTF8) { 173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return LabelsToUTF8String(labels_, output); 174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(1) << "StringPrinter::operator(): Unknown token type: " 176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << token_type_; 177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private: 183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool FstToLabels(const Fst<A> &fst) { 184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson labels_.clear(); 185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateId s = fst.Start(); 187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (s == kNoStateId) { 188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "StringPrinter::FstToLabels: Invalid starting state for " 189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "string fst."; 190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson while (fst.Final(s) == Weight::Zero()) { 194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator<Fst<A> > aiter(fst, s); 195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (aiter.Done()) { 196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "StringPrinter::FstToLabels: String fst traversal does " 197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "not reach final state."; 198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const A& arc = aiter.Value(); 202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson labels_.push_back(arc.olabel); 203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson s = arc.nextstate; 205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (s == kNoStateId) { 206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "StringPrinter::FstToLabels: Transition to invalid " 207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "state."; 208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Next(); 212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!aiter.Done()) { 213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "StringPrinter::FstToLabels: State with multiple " 214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "outgoing arcs found."; 215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool PrintLabel(Label lab, ostream& ostrm) { 223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (syms_) { 224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson string symbol = syms_->Find(lab); 225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (symbol == "") { 226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VLOG(2) << "StringPrinter::PrintLabel: Integer " << lab << " is not " 227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "mapped to any textual symbol, symbol table = " 228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << syms_->Name(); 229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ostrm << symbol; 232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } else { 233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ostrm << lab; 234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson TokenType token_type_; // Token type: symbol, byte or utf8 encoded 239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const SymbolTable *syms_; // Symbol table used when token type is symbol 240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson vector<Label> labels_; // Input FST labels. 241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson DISALLOW_COPY_AND_ASSIGN(StringPrinter); 243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}; 244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_STRING_H_ 248