1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// epsnormalize.h 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Function that implements epsilon normalization. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_EPSNORMALIZE_H__ 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_EPSNORMALIZE_H__ 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <unordered_map> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map; 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap; 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/slist.h> 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/factor-weight.h> 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/invert.h> 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/arc-map.h> 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/rmepsilon.h> 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonenum EpsNormalizeType {EPS_NORM_INPUT, EPS_NORM_OUTPUT}; 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Returns an equivalent FST that is epsilon-normalized. An acceptor is 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// epsilon-normalized if it is epsilon-removed. A transducer is input 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// epsilon-normalized if additionally if on each path any epsilon input 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// label follows all non-epsilon input labels. Output epsilon-normalized 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// is defined similarly. 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The input FST needs to be functional. 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// References: 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Mehryar Mohri. "Generic epsilon-removal and input epsilon-normalization 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// algorithms for weighted transducers", International Journal of Computer 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Science, 13(1): 129-143, 2002. 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc> 53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid EpsNormalize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson EpsNormalizeType type = EPS_NORM_INPUT) { 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson VectorFst< GallicArc<Arc, STRING_RIGHT_RESTRICT> > gfst; 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (type == EPS_NORM_INPUT) 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcMap(ifst, &gfst, ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson else // type == EPS_NORM_OUTPUT 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcMap(InvertFst<Arc>(ifst), &gfst, 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson RmEpsilon(&gfst); 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FactorWeightFst< GallicArc<Arc, STRING_RIGHT_RESTRICT>, 63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson GallicFactor<typename Arc::Label, 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson typename Arc::Weight, STRING_RIGHT_RESTRICT> > 65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson fwfst(gfst); 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ArcMap(fwfst, ofst, FromGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ofst->SetOutputSymbols(ifst.OutputSymbols()); 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if(type == EPS_NORM_OUTPUT) 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson Invert(ofst); 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif // FST_LIB_EPSNORMALIZE_H__ 75