1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: sorenj@google.com (Jeffrey Sorensen) 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_SYMBOL_TABLE_OPS_H_ 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_SYMBOL_TABLE_OPS_H_ 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <vector> 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::vector; 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string> 233da1eb108d36da35333b2d655202791af854996bPrzemyslaw Szczepaniak#include <tr1/unordered_set> 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_set; 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multiset; 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h> 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/symbol-table.h> 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Returns a minimal symbol table containing only symbols referenced by the 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// passed fst. Symbols preserve their original numbering, so fst does not 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// require relabeling. 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc> 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonSymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms, 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool input) { 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson unordered_set<typename Arc::Label> seen; 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson seen.insert(0); // Always keep epslion 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson StateIterator<Fst<Arc> > siter(fst); 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (; !siter.Done(); siter.Next()) { 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcIterator<Fst<Arc> > aiter(fst, siter.Value()); 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (; !aiter.Done(); aiter.Next()) { 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Label sym = (input) ? aiter.Value().ilabel : 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson aiter.Value().olabel; 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson seen.insert(sym); 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson SymbolTable *pruned = new SymbolTable(syms.Name() + "_pruned"); 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) { 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Label label = stiter.Value(); 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (seen.find(label) != seen.end()) { 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson pruned->AddSymbol(stiter.Symbol(), stiter.Value()); 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return pruned; 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Relabels a symbol table to make it a contiguous mapping. 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonSymbolTable *CompactSymbolTable(const SymbolTable &syms); 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Merges two SymbolTables, all symbols from left will be merged into right 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// with the same ids. Symbols in right that have conflicting ids with those 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// in left will be assigned to value assigned from the left SymbolTable. 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The returned symbol table will never modify symbol assignments from the left 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// side, but may do so on the right. If right_relabel_output is non-NULL, it 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// will be assigned true if the symbols from the right table needed to be 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// reassigned. 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A potential use case is to Compose two Fst's that have different symbol 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// tables. You can reconcile them in the following way: 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Fst<Arc> a, b; 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// bool relabel; 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// SymbolTable *bnew = MergeSymbolTable(a.OutputSymbols(), 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// b.InputSymbols(), &relabel); 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// if (relabel) { 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Relabel(b, bnew, NULL); 79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// } 80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// b.SetInputSymbols(bnew); 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// delete bnew; 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonSymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson bool *right_relabel_output = 0); 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Read the symbol table from any Fst::Read()able file, without loading the 86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// corresponding Fst. Returns NULL if the Fst does not contain a symbol table 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// or the symbol table cannot be read. 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonSymbolTable *FstReadSymbols(const string &filename, bool input); 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_SYMBOL_TABLE_OPS_H_ 92