1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// epsnormalize.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: allauzen@google.com (Cyril Allauzen)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Function that implements epsilon normalization.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_EPSNORMALIZE_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_EPSNORMALIZE_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <unordered_map>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_map;
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonusing std::tr1::unordered_multimap;
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/slist.h>
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/factor-weight.h>
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/invert.h>
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/arc-map.h>
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/rmepsilon.h>
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonenum EpsNormalizeType {EPS_NORM_INPUT, EPS_NORM_OUTPUT};
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Returns an equivalent FST that is epsilon-normalized. An acceptor is
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// epsilon-normalized if it is epsilon-removed. A transducer is input
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// epsilon-normalized if additionally if on each path any epsilon input
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// label follows all non-epsilon input labels. Output epsilon-normalized
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// is defined similarly.
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The input FST needs to be functional.
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// References:
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// - Mehryar Mohri. "Generic epsilon-removal and input epsilon-normalization
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   algorithms for weighted transducers", International Journal of Computer
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Science, 13(1): 129-143, 2002.
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class Arc>
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid EpsNormalize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst,
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                      EpsNormalizeType type = EPS_NORM_INPUT) {
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  VectorFst< GallicArc<Arc, STRING_RIGHT_RESTRICT> > gfst;
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (type == EPS_NORM_INPUT)
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ArcMap(ifst, &gfst, ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  else // type == EPS_NORM_OUTPUT
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ArcMap(InvertFst<Arc>(ifst), &gfst,
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson           ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  RmEpsilon(&gfst);
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  FactorWeightFst< GallicArc<Arc, STRING_RIGHT_RESTRICT>,
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    GallicFactor<typename Arc::Label,
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      typename Arc::Weight, STRING_RIGHT_RESTRICT> >
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    fwfst(gfst);
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ArcMap(fwfst, ofst, FromGallicMapper<Arc, STRING_RIGHT_RESTRICT>());
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ofst->SetOutputSymbols(ifst.OutputSymbols());
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if(type == EPS_NORM_OUTPUT)
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    Invert(ofst);
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_EPSNORMALIZE_H__
75