symbol-table-ops.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6//     http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13//
14// Copyright 2005-2010 Google, Inc.
15// Author: sorenj@google.com (Jeffrey Sorensen)
16
17#ifndef FST_LIB_SYMBOL_TABLE_OPS_H_
18#define FST_LIB_SYMBOL_TABLE_OPS_H_
19
20#include <vector>
21using std::vector;
22#include <string>
23#include <unordered_set>
24using std::tr1::unordered_set;
25using std::tr1::unordered_multiset;
26
27
28#include <fst/fst.h>
29#include <fst/symbol-table.h>
30
31
32namespace fst {
33
34// Returns a minimal symbol table containing only symbols referenced by the
35// passed fst.  Symbols preserve their original numbering, so fst does not
36// require relabeling.
37template<class Arc>
38SymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms,
39                              bool input) {
40  unordered_set<typename Arc::Label> seen;
41  seen.insert(0);  // Always keep epslion
42  StateIterator<Fst<Arc> > siter(fst);
43  for (; !siter.Done(); siter.Next()) {
44    ArcIterator<Fst<Arc> > aiter(fst, siter.Value());
45    for (; !aiter.Done(); aiter.Next()) {
46      typename Arc::Label sym = (input) ? aiter.Value().ilabel :
47                                          aiter.Value().olabel;
48      seen.insert(sym);
49    }
50  }
51  SymbolTable *pruned = new SymbolTable(syms.Name() + "_pruned");
52  for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) {
53    typename Arc::Label label = stiter.Value();
54    if (seen.find(label) != seen.end()) {
55      pruned->AddSymbol(stiter.Symbol(), stiter.Value());
56    }
57  }
58  return pruned;
59}
60
61// Relabels a symbol table to make it a contiguous mapping.
62SymbolTable *CompactSymbolTable(const SymbolTable &syms);
63
64// Merges two SymbolTables, all symbols from left will be merged into right
65// with the same ids.  Symbols in right that have conflicting ids with those
66// in left will be assigned to value assigned from the left SymbolTable.
67// The returned symbol table will never modify symbol assignments from the left
68// side, but may do so on the right.  If right_relabel_output is non-NULL, it
69// will be assigned true if the symbols from the right table needed to be
70// reassigned.
71// A potential use case is to Compose two Fst's that have different symbol
72// tables.  You can reconcile them in the following way:
73//   Fst<Arc> a, b;
74//   bool relabel;
75//   SymbolTable *bnew = MergeSymbolTable(a.OutputSymbols(),
76//                                        b.InputSymbols(), &relabel);
77//   if (relabel) {
78//     Relabel(b, bnew, NULL);
79//   }
80//   b.SetInputSymbols(bnew);
81//   delete bnew;
82SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right,
83                              bool *right_relabel_output = 0);
84
85// Read the symbol table from any Fst::Read()able file, without loading the
86// corresponding Fst.  Returns NULL if the Fst does not contain a symbol table
87// or the symbol table cannot be read.
88SymbolTable *FstReadSymbols(const string &filename, bool input);
89
90}  // namespace fst
91#endif  // FST_LIB_SYMBOL_TABLE_OPS_H_
92