weight.h revision 73018b4a1d088cdda0e7bd059fddf1f308a8195a
1f2038fb01417bcf7698b87a5dfaa4a861539618aerik.corry@gmail.com// weight.h
23484964a86451e86dcf04be9bd8c0d76ee04f081rossberg@chromium.org//
33484964a86451e86dcf04be9bd8c0d76ee04f081rossberg@chromium.org// Licensed under the Apache License, Version 2.0 (the "License");
4a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org// you may not use this file except in compliance with the License.
5a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org// You may obtain a copy of the License at
6a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//
7a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//      http://www.apache.org/licenses/LICENSE-2.0
8a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//
97979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org// Unless required by applicable law or agreed to in writing, software
101c09276ce2ac5214e81ca554360b9f101187893blrn@chromium.org// distributed under the License is distributed on an "AS IS" BASIS,
11a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12a70700b91bc28abeed6373b856017f7f9cc8273bmachenbach@chromium.org// See the License for the specific language governing permissions and
13fb144a0716afe7ab8bf245f2391a9e53b3db3c89fschneider@chromium.org// limitations under the License.
14c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org//
15eac65cd57a2d5f018fc440eed1b69d0fe80fe336machenbach@chromium.org//
167979bbb1df2eaff193e85d44c8da1ffa1525b7fcfschneider@chromium.org// \file
17a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org// General weight set and associated semiring operation definitions.
18528ce02b8680a3ab6d75c7079f180a4016c69b7amachenbach@chromium.org//
19865f51ff8c94f86f4c97636d70addc0f29e79674machenbach@chromium.org// A semiring is specified by two binary operations Plus and Times and
20a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org// two designated elements Zero and One with the following properties:
21a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//   Plus: associative, commutative, and has Zero as its identity.
22a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//   Times: associative and has identity One, distributes w.r.t. Plus, and
23a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//     has Zero as an annihilator:
24a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//          Times(Zero(), a) == Times(a, Zero()) = Zero().
25a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//
26a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//  A left semiring distributes on the left; a right semiring is
27af4fba3c6d2a18866505de3e6798757dd1448c6dmachenbach@chromium.org//  similarly defined.
28a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//
291510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org// A Weight class is required to be (at least) a left or right semiring.
30a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//
31a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org// In addition, the following should be defined for a Weight:
32cc8e177451e2ab80cf4eacfd782d19cd05ec2070hpayer@chromium.org//   Member: predicate on set membership.
33a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//   >>: reads textual representation of a weight.
34a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//   <<: prints textual representation of a weight.
35a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//   Read(istream &): reads binary representation of a weight.
36a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//   Write(ostrem &): writes binary representation of a weight.
37dcebac0f4c6c0da579b7cc91a0cbba8f3c820c8dricow@chromium.org//   Hash: maps weight to ssize_t.
38837a67edd9afdbfe1b59482b41693f59c48846ffulan@chromium.org//   ApproxEqual: approximate equality (for inexact weights)
3933e09c8efd078308de3c77a88301566f65c07befverwaest@chromium.org//   Quantize: quantizes wrt delta (for inexact weights)
40a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//   Divide: for all a,b,c s.t. Times(a, b) == c
41a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//     --> b = Divide(c, a, DIVIDE_LEFT) if a left semiring and b.Member()
42a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//     --> a = Divide(c, b, DIVIDE_RIGHT) if a right semiring and a.Member()
43a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//     --> b = Divide(c, a)
44a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//           = Divide(c, a, DIVIDE_ANY)
45a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//           = Divide(c, a, DIVIDE_LEFT)
46c3669763e2617aefdac84a072327b201b3dff129jkummerow@chromium.org//           = Divide(c, a, DIVIDE_RIGHT) if a commutative semiring
47a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//   ReverseWeight: the type of the corresponding reverse weight.
48a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//     Typically the same type as Weight for a (both left and right) semiring.
4994b0d6fcb08a2f01ba52c6edb712068f482366f1danno@chromium.org//     For the left string semiring, it is the right string semiring.
50a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//   Reverse: a mapping from Weight to ReverseWeight s.t.
51a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//     --> Reverse(Reverse(a)) = a
52a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//     --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b))
53a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//     --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a))
543c3c8d733702cb2b41471efa5eead1faf5b5711bmachenbach@chromium.org//     Typically the identity mapping in a (both left and right) semiring.
55c3b37129d6387b2db313f9100256d2d5f60dd9a8jkummerow@chromium.org//     In the left string semiring, it maps to the reverse string
56a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//     in the right string semiring.
57a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//   Properties: specifies additional properties that hold:
58876cca833d7212e476250d102cad185cdcfa9dfesvenpanne@chromium.org//      LeftSemiring: indicates weights form a left semiring.
594f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org//      RightSemiring: indicates weights form a right semiring.
6026ca35cc4ec47151d9c6d3890b0f052fc79cb8afmachenbach@chromium.org//      TimesCommutative: for all a,b: Times(a,b) == Times(b,a)
6126ca35cc4ec47151d9c6d3890b0f052fc79cb8afmachenbach@chromium.org//      Idempotent: for all a: Plus(a, a) == a.
62a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//      Path Property: for all a, b: Plus(a, b) == a or Plus(a, b) == b.
63a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
644a9f6553038df6b893b3d3ccae351723f4cbbae7yangguo@chromium.org
65a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#ifndef FST_LIB_WEIGHT_H__
66a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#define FST_LIB_WEIGHT_H__
67594006017e46d82ed7146611dc12c20e3c509c7ddanno@chromium.org
68a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include <cctype>
691510d58cbcf57c82a10e7d390bfe21a7ae68ba43mstarzinger@chromium.org#include <cmath>
70a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#include <iostream>
711456e708d277e725ca42a03463af16fe471c9210jkummerow@chromium.org#include <sstream>
72c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org
73c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org#include "fst/lib/compat.h"
741f410f9a9c4fbd4270749af64b477df87b753158mstarzinger@chromium.org
75c53e10d01c5495df3896b9d318910b58688c6929kmillikin@chromium.org#include "fst/lib/util.h"
764f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org
77e0e1b0d3e70c933d36ed381d511e9fda39f2a751mstarzinger@chromium.orgnamespace fst {
78c00ec2b94bc5505fa81f81daefd956f5a8776a09danno@chromium.org
794f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org//
800cc095007a3ccded63f6577751c6a04300eb7ae9machenbach@chromium.org// CONSTANT DEFINITIONS
814f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org//
820a4e901cdfb5505a896d30aa8c2e04fce0fbe069vegorov@chromium.org
83a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org// A representable float near .001
84ca2f2040e0e1a10df95bec18e69499f85f4c1316machenbach@chromium.orgconst float kDelta =                   1.0F/1024.0F;
8583aa54905e559090bea7771b83f188762cfcf082ricow@chromium.org
86c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org// For all a,b,c: Times(c, Plus(a,b)) = Plus(Times(c,a), Times(c, b))
874e308cf00936c6e7bead43e5141a04e37b49b9b5jkummerow@chromium.orgconst uint64 kLeftSemiring =           0x0000000000000001ULL;
8856454717593e7552d6846198b8e0f661fa36a3cayangguo@chromium.org
89a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org// For all a,b,c: Times(Plus(a,b), c) = Plus(Times(a,c), Times(b, c))
90a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgconst uint64 kRightSemiring =          0x0000000000000002ULL;
91ca2f2040e0e1a10df95bec18e69499f85f4c1316machenbach@chromium.org
9246a2a51ad190697e0f62c3060ce02a9de5820a07yangguo@chromium.orgconst uint64 kSemiring = kLeftSemiring | kRightSemiring;
93a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
94d4be0f0c0edfc0a0b46e745055c3dc497c0ffcb5verwaest@chromium.org// For all a,b: Times(a,b) = Times(b,a)
95c53e10d01c5495df3896b9d318910b58688c6929kmillikin@chromium.orgconst uint64 kCommutative =       0x0000000000000004ULL;
96c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org
97c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org// For all a: Plus(a, a) = a
98a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgconst uint64 kIdempotent =             0x0000000000000008ULL;
9949edbdf52640c88918f8e6638ab4965819eb1dfekmillikin@chromium.org
100a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org// For all a,b: Plus(a,b) = a or Plus(a,b) = b
1014f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.orgconst uint64 kPath =                   0x0000000000000010ULL;
1024f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org
1032bda543d75374afd8d7e98f56ca99a57ae1b7bd1svenpanne@chromium.org
104a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org// Determines direction of division.
105d2c22f0121ebc55ee26a9e742f0fd7c0b8397730kmillikin@chromium.orgenum DivideType { DIVIDE_LEFT,   // left division
106160a7b0747492f3f735353d9582521f3314bf4dfdanno@chromium.org                  DIVIDE_RIGHT,  // right division
1074f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org                  DIVIDE_ANY };  // division in a commutative semiring
1084f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org
1090ad885c06ff6a0d68bc9ad75629f7ddfaa6860b9erikcorry// NATURAL ORDER
1104f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org//
1114f693d6b99ffdbc05e5e211e08ed5039e13279d2ricow@chromium.org// By definition:
112a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org//                 a <= b iff a + b = a
113c6c5718277d4047fad1e034396228ce15571b5a4sgjesse@chromium.org// The natural order is a monotonic and negative partial order iff the
114c73d55b355913690124f3ee70c344035431cdd3ayangguo@chromium.org// semiring is idempotent and (left and right) distributive. It is a
115378b34e3f8852e94739bb77a528278fe0e2bb532ager@chromium.org// total order iff the semiring has the path property. See Mohri,
116c36ce6e8979bbbd43539f0a0effc87ea20dd65cckmillikin@chromium.org// "Semiring Framework and Algorithms for Shortest-Distance Problems",
117c36ce6e8979bbbd43539f0a0effc87ea20dd65cckmillikin@chromium.org// Journal of Automata, Languages and Combinatorics 7(3):321-350,
118e4ee6de0de64744d55b63da83156827c989c7099verwaest@chromium.org// 2002. We define the strict version of this order below.
119a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
120a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgtemplate <class W>
121a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.orgclass NaturalLess {
122528ce02b8680a3ab6d75c7079f180a4016c69b7amachenbach@chromium.org public:
123355cfd19c23ac613f2738a40e356ea48297f7d5eyangguo@chromium.org  typedef W Weight;
124efdb9d70bddd496ceb6a281dadcc065efbce37a1yangguo@chromium.org
125471f2f1d24adb4bad1edc3bf0ee35092486de187mstarzinger@chromium.org  NaturalLess() {
126a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    uint64 props = kIdempotent | kLeftSemiring | kRightSemiring;
127a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    if ((W::Properties() & props) != props)
128a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org      LOG(ERROR) << "NaturalLess: Weight type is not idempotent and "
129a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org                 << "(left and right) distributive: " << W::Type();
1305f0c45f2cacb31d36a8f80c31f17bda7751a3644ager@chromium.org  }
131011a81ffd5df0e081e7c00ef430b2fec5079bf2amachenbach@chromium.org
132a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org  bool operator()(const W &w1, const W &w2) const {
133a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org    return (Plus(w1, w2) == w1) && w1 != w2;
134e4ee6de0de64744d55b63da83156827c989c7099verwaest@chromium.org  }
135a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org};
136e8412be858dc48afaec4959e42c5932f71a7f29bmachenbach@chromium.org
13732280cf2786219b2d9a668f7f00778fb59ac40b3mstarzinger@chromium.org}  // namespace fst;
138a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org
139a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org#endif  // FST_LIB_WEIGHT_H__
140a55512615f5adc085d23bc8589d155c4b579fb7bkasperl@chromium.org