1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// weight.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// General weight set and associated semiring operation definitions.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A semiring is specified by two binary operations Plus and Times and
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// two designated elements Zero and One with the following properties:
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Plus: associative, commutative, and has Zero as its identity.
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Times: associative and has identity One, distributes w.r.t. Plus, and
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     has Zero as an annihilator:
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//          Times(Zero(), a) == Times(a, Zero()) = Zero().
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//  A left semiring distributes on the left; a right semiring is
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//  similarly defined.
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
31dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// A Weight class must have binary functions =Plus= and =Times= and
32dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// static member functions =Zero()= and =One()= and these must form
33dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin// (at least) a left or right semiring.
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// In addition, the following should be defined for a Weight:
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Member: predicate on set membership.
37dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//   NoWeight: static member function that returns an element that is
38dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//      not a set member; used to signal an error.
39dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//   >>: reads textual representation of a weight.
40dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//   <<: prints textual representation of a weight.
41dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//   Read(istream &strm): reads binary representation of a weight.
42dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//   Write(ostream &strm): writes binary representation of a weight.
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Hash: maps weight to size_t.
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   ApproxEqual: approximate equality (for inexact weights)
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Quantize: quantizes wrt delta (for inexact weights)
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Divide: for all a,b,c s.t. Times(a, b) == c
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     --> b' = Divide(c, a, DIVIDE_LEFT) if a left semiring, b'.Member()
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//      and Times(a, b') == c
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     --> a' = Divide(c, b, DIVIDE_RIGHT) if a right semiring, a'.Member()
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//      and Times(a', b) == c
51dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//     --> b' = Divide(c, a) = Divide(c, a, DIVIDE_ANY) =
52dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//      Divide(c, a, DIVIDE_LEFT) = Divide(c, a, DIVIDE_RIGHT) if a
53dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//      commutative semiring, b'.Member() and Times(a, b') = Times(b', a) = c
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   ReverseWeight: the type of the corresponding reverse weight.
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     Typically the same type as Weight for a (both left and right) semiring.
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     For the left string semiring, it is the right string semiring.
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Reverse: a mapping from Weight to ReverseWeight s.t.
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     --> Reverse(Reverse(a)) = a
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b))
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a))
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     Typically the identity mapping in a (both left and right) semiring.
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     In the left string semiring, it maps to the reverse string
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     in the right string semiring.
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//   Properties: specifies additional properties that hold:
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//      LeftSemiring: indicates weights form a left semiring.
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//      RightSemiring: indicates weights form a right semiring.
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//      Commutative: for all a,b: Times(a,b) == Times(b,a)
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//      Idempotent: for all a: Plus(a, a) == a.
69dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin//      Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_WEIGHT_H__
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_WEIGHT_H__
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <cmath>
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <cctype>
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <iostream>
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <sstream>
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/compat.h>
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h>
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// CONSTANT DEFINITIONS
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// A representable float near .001
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst float kDelta =                   1.0F/1024.0F;
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For all a,b,c: Times(c, Plus(a,b)) = Plus(Times(c,a), Times(c, b))
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst uint64 kLeftSemiring =           0x0000000000000001ULL;
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For all a,b,c: Times(Plus(a,b), c) = Plus(Times(a,c), Times(b, c))
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst uint64 kRightSemiring =          0x0000000000000002ULL;
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst uint64 kSemiring = kLeftSemiring | kRightSemiring;
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For all a,b: Times(a,b) = Times(b,a)
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst uint64 kCommutative =       0x0000000000000004ULL;
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For all a: Plus(a, a) = a
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst uint64 kIdempotent =             0x0000000000000008ULL;
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// For all a,b: Plus(a,b) = a or Plus(a,b) = b
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonconst uint64 kPath =                   0x0000000000000010ULL;
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Determines direction of division.
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonenum DivideType { DIVIDE_LEFT,   // left division
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  DIVIDE_RIGHT,  // right division
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                  DIVIDE_ANY };  // division in a commutative semiring
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// NATURAL ORDER
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// By definition:
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//                 a <= b iff a + b = a
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// The natural order is a negative partial order iff the semiring is
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// idempotent. It is trivially monotonic for plus. It is left
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (resp. right) monotonic for times iff the semiring is left
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// (resp. right) distributive. It is a total order iff the semiring
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// has the path property. See Mohri, "Semiring Framework and
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Algorithms for Shortest-Distance Problems", Journal of Automata,
127f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Languages and Combinatorics 7(3):321-350, 2002. We define the
128f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// strict version of this order below.
129f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
130f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class W>
131f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonclass NaturalLess {
132f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson public:
133f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef W Weight;
134f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
135f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  NaturalLess() {
136f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!(W::Properties() & kIdempotent)) {
137f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      FSTERROR() << "NaturalLess: Weight type is not idempotent: "
138f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                 << W::Type();
139f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
140f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
141f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
142f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  bool operator()(const W &w1, const W &w2) const {
143f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return (Plus(w1, w2) == w1) && w1 != w2;
144f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
145f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
146f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
147f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
148f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Power is the iterated product for arbitrary semirings such that
149f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Power(w, 0) is One() for the semiring, and
150f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Power(w, n) = Times(Power(w, n-1), w)
151f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
152f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class W>
153f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonW Power(W w, size_t n) {
154f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  W result = W::One();
155f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (size_t i = 0; i < n; ++i) {
156f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    result = Times(result, w);
157f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
158f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return result;
159f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
160f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
161f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// General weight converter - raises error.
162f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class W1, class W2>
163f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct WeightConvert {
164f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  W2 operator()(W1 w1) const {
165f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "WeightConvert: can't convert weight from \""
166f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << W1::Type() << "\" to \"" << W2::Type();
167f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return W2::NoWeight();
168f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
169f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
170f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
171f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Specialized weight converter to self.
172f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate <class W>
173f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonstruct WeightConvert<W, W> {
174f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  W operator()(W w) const { return w; }
175f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson};
176f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
177f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
178f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
179f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_WEIGHT_H__
180