1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// string.h
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen)
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Utilities to convert strings into FSTs.
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_STRING_H_
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_STRING_H_
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compact-fst.h>
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/mutable-fst.h>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDECLARE_string(fst_field_separator);
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Functor compiling a string in an FST
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StringCompiler {
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 };
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StringCompiler(TokenType type, const SymbolTable *syms = 0,
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 Label unknown_label = kNoLabel,
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 bool allow_negative = false)
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : token_type_(type), syms_(syms), unknown_label_(unknown_label),
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        allow_negative_(allow_negative) {}
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Compile string 's' into FST 'fst'.
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class F>
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool operator()(const string &s, F *fst) {
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    vector<Label> labels;
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!ConvertStringToLabels(s, &labels))
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Compile(labels, fst);
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool ConvertStringToLabels(const string &str, vector<Label> *labels) const {
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    labels->clear();
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (token_type_ == BYTE) {
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 0; i < str.size(); ++i)
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        labels->push_back(static_cast<unsigned char>(str[i]));
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (token_type_ == UTF8) {
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return UTF8StringToLabels(str, labels);
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      char *c_str = new char[str.size() + 1];
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      str.copy(c_str, str.size());
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      c_str[str.size()] = 0;
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      vector<char *> vec;
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      string separator = "\n" + FLAGS_fst_field_separator;
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      SplitToVector(c_str, separator.c_str(), &vec, true);
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 0; i < vec.size(); ++i) {
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        Label label;
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (!ConvertSymbolToLabel(vec[i], &label))
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          return false;
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        labels->push_back(label);
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      delete[] c_str;
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Compile(const vector<Label> &labels, MutableFst<A> *fst) const {
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst->DeleteStates();
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    while (fst->NumStates() <= labels.size())
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst->AddState();
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (size_t i = 0; i < labels.size(); ++i)
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      fst->AddArc(i, Arc(labels[i], labels[i], Weight::One(), i + 1));
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst->SetStart(0);
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst->SetFinal(labels.size(), Weight::One());
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  template <class Unsigned>
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  void Compile(const vector<Label> &labels, CompactFst<A, StringCompactor<A>,
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               Unsigned> *fst) const {
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fst->SetCompactElements(labels.begin(), labels.end());
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool ConvertSymbolToLabel(const char *s, Label* output) const {
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    int64 n;
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (syms_) {
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      n = syms_->Find(s);
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if ((n == -1) && (unknown_label_ != kNoLabel))
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        n = unknown_label_;
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (n == -1 || (!allow_negative_ && n < 0)) {
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Symbol \"" << s
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                << "\" is not mapped to any integer label, symbol table = "
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << syms_->Name();
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      char *p;
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      n = strtoll(s, &p, 10);
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (p < s + strlen(s) || (!allow_negative_ && n < 0)) {
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Bad label integer "
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                << "= \"" << s << "\"";
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    *output = n;
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  TokenType token_type_;     // Token type: symbol, byte or utf8 encoded
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable *syms_;  // Symbol table used when token type is symbol
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  Label unknown_label_;      // Label for token missing from symbol table
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool allow_negative_;      // Negative labels allowed?
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(StringCompiler);
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Functor to print a string FST as a string.
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class A>
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass StringPrinter {
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef A Arc;
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Label Label;
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::StateId StateId;
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename A::Weight Weight;
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 };
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StringPrinter(TokenType token_type,
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                const SymbolTable *syms = 0)
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      : token_type_(token_type), syms_(syms) {}
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Convert the FST 'fst' into the string 'output'
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool operator()(const Fst<A> &fst, string *output) {
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    bool is_a_string = FstToLabels(fst);
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!is_a_string) {
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      VLOG(1) << "StringPrinter::operator(): Fst is not a string.";
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    output->clear();
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (token_type_ == SYMBOL) {
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      stringstream sstrm;
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 0; i < labels_.size(); ++i) {
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (i)
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          sstrm << *(FLAGS_fst_field_separator.rbegin());
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        if (!PrintLabel(labels_[i], sstrm))
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson          return false;
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      *output = sstrm.str();
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (token_type_ == BYTE) {
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      for (size_t i = 0; i < labels_.size(); ++i) {
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        output->push_back(labels_[i]);
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else if (token_type_ == UTF8) {
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return LabelsToUTF8String(labels_, output);
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      VLOG(1) << "StringPrinter::operator(): Unknown token type: "
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              << token_type_;
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
180f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
181f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
182f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson private:
183f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool FstToLabels(const Fst<A> &fst) {
184f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    labels_.clear();
185f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
186f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s = fst.Start();
187f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (s == kNoStateId) {
188f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      VLOG(2) << "StringPrinter::FstToLabels: Invalid starting state for "
189f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson              << "string fst.";
190f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
191f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
192f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
193f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    while (fst.Final(s) == Weight::Zero()) {
194f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ArcIterator<Fst<A> > aiter(fst, s);
195f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (aiter.Done()) {
196f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        VLOG(2) << "StringPrinter::FstToLabels: String fst traversal does "
197f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                << "not reach final state.";
198f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
199f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
200f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
201f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const A& arc = aiter.Value();
202f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      labels_.push_back(arc.olabel);
203f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
204f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      s = arc.nextstate;
205f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (s == kNoStateId) {
206f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        VLOG(2) << "StringPrinter::FstToLabels: Transition to invalid "
207f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                << "state.";
208f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
209f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
210f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
211f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      aiter.Next();
212f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!aiter.Done()) {
213f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        VLOG(2) << "StringPrinter::FstToLabels: State with multiple "
214f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                << "outgoing arcs found.";
215f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
216f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
217f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
218f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
219f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
220f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
221f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
222f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool PrintLabel(Label lab, ostream& ostrm) {
223f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (syms_) {
224f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      string symbol = syms_->Find(lab);
225f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (symbol == "") {
226f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        VLOG(2) << "StringPrinter::PrintLabel: Integer " << lab << " is not "
227f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                << "mapped to any textual symbol, symbol table = "
228f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << syms_->Name();
229f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
230f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
231f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ostrm << symbol;
232f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    } else {
233f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ostrm << lab;
234f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
235f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
236f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
237f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
238f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  TokenType token_type_;     // Token type: symbol, byte or utf8 encoded
239f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable *syms_;  // Symbol table used when token type is symbol
240f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  vector<Label> labels_;     // Input FST labels.
241f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
242f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  DISALLOW_COPY_AND_ASSIGN(StringPrinter);
243f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
244f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
245f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
246f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
247f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_STRING_H_
248