1 2// Licensed under the Apache License, Version 2.0 (the "License"); 3// you may not use this file except in compliance with the License. 4// You may obtain a copy of the License at 5// 6// http://www.apache.org/licenses/LICENSE-2.0 7// 8// Unless required by applicable law or agreed to in writing, software 9// distributed under the License is distributed on an "AS IS" BASIS, 10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11// See the License for the specific language governing permissions and 12// limitations under the License. 13// 14// Copyright 2005-2010 Google, Inc. 15// Author: sorenj@google.com (Jeffrey Sorensen) 16 17#ifndef FST_LIB_SYMBOL_TABLE_OPS_H_ 18#define FST_LIB_SYMBOL_TABLE_OPS_H_ 19 20#include <vector> 21using std::vector; 22#include <string> 23#include <unordered_set> 24using std::tr1::unordered_set; 25using std::tr1::unordered_multiset; 26 27 28#include <fst/fst.h> 29#include <fst/symbol-table.h> 30 31 32namespace fst { 33 34// Returns a minimal symbol table containing only symbols referenced by the 35// passed fst. Symbols preserve their original numbering, so fst does not 36// require relabeling. 37template<class Arc> 38SymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms, 39 bool input) { 40 unordered_set<typename Arc::Label> seen; 41 seen.insert(0); // Always keep epslion 42 StateIterator<Fst<Arc> > siter(fst); 43 for (; !siter.Done(); siter.Next()) { 44 ArcIterator<Fst<Arc> > aiter(fst, siter.Value()); 45 for (; !aiter.Done(); aiter.Next()) { 46 typename Arc::Label sym = (input) ? aiter.Value().ilabel : 47 aiter.Value().olabel; 48 seen.insert(sym); 49 } 50 } 51 SymbolTable *pruned = new SymbolTable(syms.Name() + "_pruned"); 52 for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) { 53 typename Arc::Label label = stiter.Value(); 54 if (seen.find(label) != seen.end()) { 55 pruned->AddSymbol(stiter.Symbol(), stiter.Value()); 56 } 57 } 58 return pruned; 59} 60 61// Relabels a symbol table to make it a contiguous mapping. 62SymbolTable *CompactSymbolTable(const SymbolTable &syms); 63 64// Merges two SymbolTables, all symbols from left will be merged into right 65// with the same ids. Symbols in right that have conflicting ids with those 66// in left will be assigned to value assigned from the left SymbolTable. 67// The returned symbol table will never modify symbol assignments from the left 68// side, but may do so on the right. If right_relabel_output is non-NULL, it 69// will be assigned true if the symbols from the right table needed to be 70// reassigned. 71// A potential use case is to Compose two Fst's that have different symbol 72// tables. You can reconcile them in the following way: 73// Fst<Arc> a, b; 74// bool relabel; 75// SymbolTable *bnew = MergeSymbolTable(a.OutputSymbols(), 76// b.InputSymbols(), &relabel); 77// if (relabel) { 78// Relabel(b, bnew, NULL); 79// } 80// b.SetInputSymbols(bnew); 81// delete bnew; 82SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, 83 bool *right_relabel_output = 0); 84 85// Read the symbol table from any Fst::Read()able file, without loading the 86// corresponding Fst. Returns NULL if the Fst does not contain a symbol table 87// or the symbol table cannot be read. 88SymbolTable *FstReadSymbols(const string &filename, bool input); 89 90} // namespace fst 91#endif // FST_LIB_SYMBOL_TABLE_OPS_H_ 92