1d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// epsnormalize.h
2d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola
3d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// Licensed under the Apache License, Version 2.0 (the "License");
4d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// you may not use this file except in compliance with the License.
5d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// You may obtain a copy of the License at
6d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola//
7d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola//     http://www.apache.org/licenses/LICENSE-2.0
8d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola//
9d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// Unless required by applicable law or agreed to in writing, software
10d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// distributed under the License is distributed on an "AS IS" BASIS,
11d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// See the License for the specific language governing permissions and
13d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// limitations under the License.
14d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola//
15d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// Copyright 2005-2010 Google, Inc.
16d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// Author: allauzen@google.com (Cyril Allauzen)
17d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola//
18d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// \file
19d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola// Function that implements epsilon normalization.
20d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola
21d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola#ifndef FST_LIB_EPSNORMALIZE_H__
22d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola#define FST_LIB_EPSNORMALIZE_H__
23d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola
24d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola#include <tr1/unordered_map>
25d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindolausing std::tr1::unordered_map;
26ccdb2a5e0ab4b0a7fc9d3859c6bb1676182169d5Benjamin Kramerusing std::tr1::unordered_multimap;
27ccdb2a5e0ab4b0a7fc9d3859c6bb1676182169d5Benjamin Kramer
28ccdb2a5e0ab4b0a7fc9d3859c6bb1676182169d5Benjamin Kramer
29f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin#include <fst/factor-weight.h>
307dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne#include <fst/invert.h>
317dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne#include <fst/arc-map.h>
32f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin#include <fst/rmepsilon.h>
33f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin
34f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin
35f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskinnamespace fst {
36f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin
37be5ff2f830d1833891051472d29818aa9f783cceRafael Espindolaenum EpsNormalizeType {EPS_NORM_INPUT, EPS_NORM_OUTPUT};
38be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola
39be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// Returns an equivalent FST that is epsilon-normalized. An acceptor is
40be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// epsilon-normalized if it is epsilon-removed. A transducer is input
41be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// epsilon-normalized if additionally if on each path any epsilon input
42be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// label follows all non-epsilon input labels. Output epsilon-normalized
43be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// is defined similarly.
44be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola//
45be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// The input FST needs to be functional.
46be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola//
47be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// References:
48be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola// - Mehryar Mohri. "Generic epsilon-removal and input epsilon-normalization
49be5ff2f830d1833891051472d29818aa9f783cceRafael Espindola//   algorithms for weighted transducers", International Journal of Computer
50f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin//   Science, 13(1): 129-143, 2002.
51f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskintemplate <class Arc>
52f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskinvoid EpsNormalize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
53f18a87b1ce1be5ccc3b6b3dbd4f15374710e237bJeffrey Yasskin                      EpsNormalizeType type = EPS_NORM_INPUT) {
54d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola  VectorFst< GallicArc<Arc, STRING_RIGHT_RESTRICT> > gfst;
55d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola  if (type == EPS_NORM_INPUT)
56d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola    ArcMap(ifst, &gfst, ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
57d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola  else // type == EPS_NORM_OUTPUT
58d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola    ArcMap(InvertFst<Arc>(ifst), &gfst,
59d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola           ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
60d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola  RmEpsilon(&gfst);
61d85c1e9800213d2a8750215f6980ae67411cac72Rafael Espindola  FactorWeightFst< GallicArc<Arc, STRING_RIGHT_RESTRICT>,
629b7568da31b0dc97e15253391cb100e9958d3d78Rafael Espindola    GallicFactor<typename Arc::Label,
639b7568da31b0dc97e15253391cb100e9958d3d78Rafael Espindola      typename Arc::Weight, STRING_RIGHT_RESTRICT> >
649b7568da31b0dc97e15253391cb100e9958d3d78Rafael Espindola    fwfst(gfst);
65ccdb2a5e0ab4b0a7fc9d3859c6bb1676182169d5Benjamin Kramer  ArcMap(fwfst, ofst, FromGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
669b7568da31b0dc97e15253391cb100e9958d3d78Rafael Espindola  ofst->SetOutputSymbols(ifst.OutputSymbols());
67ccdb2a5e0ab4b0a7fc9d3859c6bb1676182169d5Benjamin Kramer  if(type == EPS_NORM_OUTPUT)
689b7568da31b0dc97e15253391cb100e9958d3d78Rafael Espindola    Invert(ofst);
697dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne}
707dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne
717dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne}  // namespace fst
727dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne
737dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne#endif  // FST_LIB_EPSNORMALIZE_H__
747dc8e8175a3a1d4e40819932165e26b01091541ePeter Collingbourne