1d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// epsnormalize.h 2d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola 3d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// Licensed under the Apache License, Version 2.0 (the "License"); 4d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// you may not use this file except in compliance with the License. 5d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// You may obtain a copy of the License at 6d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// 7d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// http://www.apache.org/licenses/LICENSE-2.0 8d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// 9d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// Unless required by applicable law or agreed to in writing, software 10d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// distributed under the License is distributed on an "AS IS" BASIS, 11d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// See the License for the specific language governing permissions and 13d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// limitations under the License. 14d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// 15d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// Copyright 2005-2010 Google, Inc. 16d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// Author: allauzen@google.com (Cyril Allauzen) 17d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// 18d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// \file 19d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// Function that implements epsilon normalization. 20d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola 21d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola#ifndef FST_LIB_EPSNORMALIZE_H__ 22d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola#define FST_LIB_EPSNORMALIZE_H__ 23d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola 24d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola#include <tr1/unordered_map> 25d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindolausing std::tr1::unordered_map; 26ccdb2a5e0ab4b0a7fc9d3859c6bb1676182169d5Benjamin Kramerusing std::tr1::unordered_multimap; 27ccdb2a5e0ab4b0a7fc9d3859c6bb1676182169d5Benjamin Kramer 28ccdb2a5e0ab4b0a7fc9d3859c6bb1676182169d5Benjamin Kramer 29f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin#include <fst/factor-weight.h> 307dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne#include <fst/invert.h> 317dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne#include <fst/arc-map.h> 32f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin#include <fst/rmepsilon.h> 33f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin 34f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin 35f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskinnamespace fst { 36f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin 37be5ff2f830d1833891051472d29818aa9f783cceRafael Espindolaenum EpsNormalizeType {EPS_NORM_INPUT, EPS_NORM_OUTPUT}; 38be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola 39be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// Returns an equivalent FST that is epsilon-normalized. An acceptor is 40be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// epsilon-normalized if it is epsilon-removed. A transducer is input 41be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// epsilon-normalized if additionally if on each path any epsilon input 42be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// label follows all non-epsilon input labels. Output epsilon-normalized 43be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// is defined similarly. 44be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// 45be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// The input FST needs to be functional. 46be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// 47be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// References: 48be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// - Mehryar Mohri. "Generic epsilon-removal and input epsilon-normalization 49be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// algorithms for weighted transducers", International Journal of Computer 50f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin// Science, 13(1): 129-143, 2002. 51f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskintemplate <class Arc> 52f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskinvoid EpsNormalize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, 53f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin EpsNormalizeType type = EPS_NORM_INPUT) { 54d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola VectorFst< GallicArc<Arc, STRING_RIGHT_RESTRICT> > gfst; 55d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola if (type == EPS_NORM_INPUT) 56d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola ArcMap(ifst, &gfst, ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); 57d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola else // type == EPS_NORM_OUTPUT 58d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola ArcMap(InvertFst<Arc>(ifst), &gfst, 59d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); 60d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola RmEpsilon(&gfst); 61d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola FactorWeightFst< GallicArc<Arc, STRING_RIGHT_RESTRICT>, 629b7568da31b0dc97e15253391cb100e9958d3d78Rafael Espindola GallicFactor<typename Arc::Label, 639b7568da31b0dc97e15253391cb100e9958d3d78Rafael Espindola typename Arc::Weight, STRING_RIGHT_RESTRICT> > 649b7568da31b0dc97e15253391cb100e9958d3d78Rafael Espindola fwfst(gfst); 65ccdb2a5e0ab4b0a7fc9d3859c6bb1676182169d5Benjamin Kramer ArcMap(fwfst, ofst, FromGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); 669b7568da31b0dc97e15253391cb100e9958d3d78Rafael Espindola ofst->SetOutputSymbols(ifst.OutputSymbols()); 67ccdb2a5e0ab4b0a7fc9d3859c6bb1676182169d5Benjamin Kramer if(type == EPS_NORM_OUTPUT) 689b7568da31b0dc97e15253391cb100e9958d3d78Rafael Espindola Invert(ofst); 697dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne} 707dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne 717dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne} // namespace fst 727dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne 737dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne#endif // FST_LIB_EPSNORMALIZE_H__ 747dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne