properties.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1// properties.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: Michael Riley <riley@google.com>
17// \file
18// FST property bits.
19
20#ifndef FST_LIB_PROPERTIES_H__
21#define FST_LIB_PROPERTIES_H__
22
23#include <sys/types.h>
24#include <vector>
25using std::vector;
26
27#include <fst/compat.h>
28
29namespace fst {
30
31// The property bits here assert facts about an FST. If individual
32// bits are added, then the composite properties below, the property
33// functions and property names in properties.cc, and
34// TestProperties() in test-properties.h should be updated.
35
36//
37// BINARY PROPERTIES
38//
39// For each property below, there is a single bit. If it is set,
40// the property is true. If it is not set, the property is false.
41//
42
43// The Fst is an ExpandedFst
44const uint64 kExpanded =          0x0000000000000001ULL;
45
46// The Fst is a MutableFst
47const uint64 kMutable =           0x0000000000000002ULL;
48
49// An error was detected while constructing/using the FST
50const uint64 kError =             0x0000000000000004ULL;
51
52//
53// TRINARY PROPERTIES
54//
55// For each of these properties below there is a pair of property bits
56// - one positive and one negative. If the positive bit is set, the
57// property is true. If the negative bit is set, the property is
58// false. If neither is set, the property has unknown value. Both
59// should never be simultaneously set. The individual positive and
60// negative bit pairs should be adjacent with the positive bit
61// at an odd and lower position.
62
63// ilabel == olabel for each arc
64const uint64 kAcceptor =          0x0000000000010000ULL;
65// ilabel != olabel for some arc
66const uint64 kNotAcceptor =       0x0000000000020000ULL;
67
68// ilabels unique leaving each state
69const uint64 kIDeterministic =    0x0000000000040000ULL;
70// ilabels not unique leaving some state
71const uint64 kNonIDeterministic = 0x0000000000080000ULL;
72
73// olabels unique leaving each state
74const uint64 kODeterministic =    0x0000000000100000ULL;
75// olabels not unique leaving some state
76const uint64 kNonODeterministic = 0x0000000000200000ULL;
77
78// FST has input/output epsilons
79const uint64 kEpsilons =          0x0000000000400000ULL;
80// FST has no input/output epsilons
81const uint64 kNoEpsilons =        0x0000000000800000ULL;
82
83// FST has input epsilons
84const uint64 kIEpsilons =         0x0000000001000000ULL;
85// FST has no input epsilons
86const uint64 kNoIEpsilons =       0x0000000002000000ULL;
87
88// FST has output epsilons
89const uint64 kOEpsilons =         0x0000000004000000ULL;
90// FST has no output epsilons
91const uint64 kNoOEpsilons =       0x0000000008000000ULL;
92
93// ilabels sorted wrt < for each state
94const uint64 kILabelSorted =      0x0000000010000000ULL;
95// ilabels not sorted wrt < for some state
96const uint64 kNotILabelSorted =   0x0000000020000000ULL;
97
98// olabels sorted wrt < for each state
99const uint64 kOLabelSorted =      0x0000000040000000ULL;
100// olabels not sorted wrt < for some state
101const uint64 kNotOLabelSorted =   0x0000000080000000ULL;
102
103// Non-trivial arc or final weights
104const uint64 kWeighted =          0x0000000100000000ULL;
105// Only trivial arc and final weights
106const uint64 kUnweighted =        0x0000000200000000ULL;
107
108// FST has cycles
109const uint64 kCyclic =            0x0000000400000000ULL;
110// FST has no cycles
111const uint64 kAcyclic =           0x0000000800000000ULL;
112
113// FST has cycles containing the initial state
114const uint64 kInitialCyclic =     0x0000001000000000ULL;
115// FST has no cycles containing the initial state
116const uint64 kInitialAcyclic =    0x0000002000000000ULL;
117
118// FST is topologically sorted
119const uint64 kTopSorted =         0x0000004000000000ULL;
120// FST is not topologically sorted
121const uint64 kNotTopSorted =      0x0000008000000000ULL;
122
123// All states reachable from the initial state
124const uint64 kAccessible =        0x0000010000000000ULL;
125// Not all states reachable from the initial state
126const uint64 kNotAccessible =     0x0000020000000000ULL;
127
128// All states can reach a final state
129const uint64 kCoAccessible =      0x0000040000000000ULL;
130// Not all states can reach a final state
131const uint64 kNotCoAccessible =   0x0000080000000000ULL;
132
133// If NumStates() > 0, then state 0 is initial, state NumStates()-1 is
134// final, there is a transition from each non-final state i to
135// state i+1, and there are no other transitions.
136const uint64 kString =            0x0000100000000000ULL;
137
138// Not a string FST
139const uint64 kNotString =         0x0000200000000000ULL;
140
141//
142// COMPOSITE PROPERTIES
143//
144
145// Properties of an empty machine
146const uint64 kNullProperties
147  = kAcceptor | kIDeterministic | kODeterministic | kNoEpsilons |
148    kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted |
149    kUnweighted | kAcyclic | kInitialAcyclic | kTopSorted |
150    kAccessible | kCoAccessible | kString;
151
152// Properties that are preserved when an FST is copied
153const uint64 kCopyProperties
154  = kError | kAcceptor | kNotAcceptor | kIDeterministic | kNonIDeterministic |
155    kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons |
156    kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons |
157    kILabelSorted | kNotILabelSorted | kOLabelSorted |
158    kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic |
159    kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
160    kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
161    kString | kNotString;
162
163// Properites that are intrinsic to the FST
164const uint64 kIntrinsicProperties
165  = kExpanded | kMutable | kAcceptor | kNotAcceptor | kIDeterministic |
166    kNonIDeterministic | kODeterministic | kNonODeterministic |
167    kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
168    kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted |
169    kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic |
170    kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
171    kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
172    kString | kNotString;
173
174// Properites that are (potentially) extrinsic to the FST
175const uint64 kExtrinsicProperties = kError;
176
177// Properties that are preserved when an FST start state is set
178const uint64 kSetStartProperties
179  = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
180    kIDeterministic | kNonIDeterministic | kODeterministic |
181    kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
182    kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
183    kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted |
184    kUnweighted | kCyclic | kAcyclic | kTopSorted | kNotTopSorted |
185    kCoAccessible | kNotCoAccessible;
186
187// Properties that are preserved when an FST final weight is set
188const uint64 kSetFinalProperties
189  = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
190    kIDeterministic | kNonIDeterministic | kODeterministic |
191    kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
192    kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
193    kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kCyclic |
194    kAcyclic | kInitialCyclic | kInitialAcyclic | kTopSorted |
195    kNotTopSorted | kAccessible | kNotAccessible;
196
197// Properties that are preserved when an FST state is added
198const uint64 kAddStateProperties
199  = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
200    kIDeterministic | kNonIDeterministic | kODeterministic |
201    kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons |
202    kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted |
203    kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted |
204    kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
205    kInitialAcyclic | kTopSorted | kNotTopSorted | kNotAccessible |
206    kNotCoAccessible | kNotString;
207
208// Properties that are preserved when an FST arc is added
209const uint64 kAddArcProperties = kExpanded | kMutable | kError | kNotAcceptor |
210    kNonIDeterministic | kNonODeterministic | kEpsilons | kIEpsilons |
211    kOEpsilons | kNotILabelSorted | kNotOLabelSorted | kWeighted |
212    kCyclic | kInitialCyclic | kNotTopSorted | kAccessible | kCoAccessible;
213
214// Properties that are preserved when an FST arc is set
215const uint64 kSetArcProperties = kExpanded | kMutable | kError;
216
217// Properties that are preserved when FST states are deleted
218const uint64 kDeleteStatesProperties
219  = kExpanded | kMutable | kError | kAcceptor | kIDeterministic |
220    kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
221    kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic |
222    kInitialAcyclic | kTopSorted;
223
224// Properties that are preserved when FST arcs are deleted
225const uint64 kDeleteArcsProperties
226  = kExpanded | kMutable | kError | kAcceptor | kIDeterministic |
227    kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
228    kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic |
229    kInitialAcyclic | kTopSorted |  kNotAccessible | kNotCoAccessible;
230
231// Properties that are preserved when an FST's states are reordered
232const uint64 kStateSortProperties = kExpanded | kMutable | kError | kAcceptor |
233    kNotAcceptor | kIDeterministic | kNonIDeterministic |
234    kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons |
235    kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons |
236    kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted
237    | kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
238    kInitialAcyclic | kAccessible | kNotAccessible | kCoAccessible |
239    kNotCoAccessible;
240
241// Properties that are preserved when an FST's arcs are reordered
242const uint64 kArcSortProperties =
243  kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic |
244  kNonIDeterministic | kODeterministic | kNonODeterministic |
245  kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
246  kNoOEpsilons | kWeighted | kUnweighted | kCyclic | kAcyclic |
247  kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted |
248  kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible |
249  kString | kNotString;
250
251// Properties that are preserved when an FST's input labels are changed.
252const uint64 kILabelInvariantProperties =
253  kExpanded | kMutable | kError | kODeterministic | kNonODeterministic |
254  kOEpsilons | kNoOEpsilons | kOLabelSorted | kNotOLabelSorted |
255  kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
256  kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible |
257  kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString;
258
259// Properties that are preserved when an FST's output labels are changed.
260const uint64 kOLabelInvariantProperties =
261  kExpanded | kMutable | kError | kIDeterministic | kNonIDeterministic |
262  kIEpsilons | kNoIEpsilons | kILabelSorted | kNotILabelSorted |
263  kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
264  kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible |
265  kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString;
266
267// Properties that are preserved when an FST's weights are changed.
268// This assumes that the set of states that are non-final is not changed.
269const uint64 kWeightInvariantProperties =
270  kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic |
271  kNonIDeterministic | kODeterministic | kNonODeterministic |
272  kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons |
273  kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted |
274  kNotOLabelSorted | kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
275  kTopSorted | kNotTopSorted | kAccessible | kNotAccessible | kCoAccessible |
276  kNotCoAccessible | kString | kNotString;
277
278// Properties that are preserved when a superfinal state is added
279// and an FSTs final weights are directed to it via new transitions.
280const uint64 kAddSuperFinalProperties  = kExpanded | kMutable | kError |
281    kAcceptor | kNotAcceptor | kNonIDeterministic | kNonODeterministic |
282    kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | kNotOLabelSorted |
283    kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
284    kInitialAcyclic | kNotTopSorted | kNotAccessible | kCoAccessible |
285    kNotCoAccessible | kNotString;
286
287// Properties that are preserved when a superfinal state is removed
288// and the epsilon transitions directed to it are made final weights.
289const uint64 kRmSuperFinalProperties  = kExpanded | kMutable | kError |
290    kAcceptor | kNotAcceptor | kIDeterministic | kODeterministic |
291    kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted |
292    kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic |
293    kInitialAcyclic | kTopSorted | kAccessible | kCoAccessible |
294    kNotCoAccessible | kString;
295
296// All binary properties
297const uint64 kBinaryProperties =  0x0000000000000007ULL;
298
299// All trinary properties
300const uint64 kTrinaryProperties = 0x00003fffffff0000ULL;
301
302//
303// COMPUTED PROPERTIES
304//
305
306// 1st bit of trinary properties
307const uint64 kPosTrinaryProperties =
308  kTrinaryProperties & 0x5555555555555555ULL;
309
310// 2nd bit of trinary properties
311const uint64 kNegTrinaryProperties =
312  kTrinaryProperties & 0xaaaaaaaaaaaaaaaaULL;
313
314// All properties
315const uint64 kFstProperties = kBinaryProperties | kTrinaryProperties;
316
317//
318// PROPERTY FUNCTIONS and STRING NAMES (defined in properties.cc)
319//
320
321// Below are functions for getting property bit vectors when executing
322// mutating fst operations.
323inline uint64 SetStartProperties(uint64 inprops);
324template <typename Weight>
325uint64 SetFinalProperties(uint64 inprops, Weight old_weight,
326                          Weight new_weight);
327inline uint64 AddStateProperties(uint64 inprops);
328template <typename A>
329uint64 AddArcProperties(uint64 inprops, typename A::StateId s, const A &arc,
330                           const A *prev_arc);
331inline uint64 DeleteStatesProperties(uint64 inprops);
332inline uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticProps);
333inline uint64 DeleteArcsProperties(uint64 inprops);
334
335uint64 ClosureProperties(uint64 inprops, bool star, bool delayed = false);
336uint64 ComplementProperties(uint64 inprops);
337uint64 ComposeProperties(uint64 inprops1, uint64 inprops2);
338uint64 ConcatProperties(uint64 inprops1, uint64 inprops2,
339                        bool delayed = false);
340uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label);
341uint64 FactorWeightProperties(uint64 inprops);
342uint64 InvertProperties(uint64 inprops);
343uint64 ProjectProperties(uint64 inprops, bool project_input);
344uint64 RandGenProperties(uint64 inprops, bool weighted);
345uint64 RelabelProperties(uint64 inprops);
346uint64 ReplaceProperties(const vector<uint64>& inprops,
347                         ssize_t root,
348                         bool epsilon_on_replace,
349                         bool no_empty_fst);
350uint64 ReverseProperties(uint64 inprops);
351uint64 ReweightProperties(uint64 inprops);
352uint64 RmEpsilonProperties(uint64 inprops, bool delayed = false);
353uint64 ShortestPathProperties(uint64 props);
354uint64 SynchronizeProperties(uint64 inprops);
355uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed = false);
356
357// Definitions of inlined functions.
358
359uint64 SetStartProperties(uint64 inprops) {
360  uint64 outprops = inprops & kSetStartProperties;
361  if (inprops & kAcyclic) {
362    outprops |= kInitialAcyclic;
363  }
364  return outprops;
365}
366
367uint64 AddStateProperties(uint64 inprops) {
368  return inprops & kAddStateProperties;
369}
370
371uint64 DeleteStatesProperties(uint64 inprops) {
372  return inprops & kDeleteStatesProperties;
373}
374
375uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticprops) {
376  uint64 outprops = inprops & kError;
377  return outprops | kNullProperties | staticprops;
378}
379
380uint64 DeleteArcsProperties(uint64 inprops) {
381  return inprops & kDeleteArcsProperties;
382}
383
384// Definitions of template functions.
385
386//
387template <typename Weight>
388uint64 SetFinalProperties(uint64 inprops, Weight old_weight,
389                          Weight new_weight) {
390  uint64 outprops = inprops;
391  if (old_weight != Weight::Zero() && old_weight != Weight::One()) {
392    outprops &= ~kWeighted;
393  }
394  if (new_weight != Weight::Zero() && new_weight != Weight::One()) {
395    outprops |= kWeighted;
396    outprops &= ~kUnweighted;
397  }
398  outprops &= kSetFinalProperties | kWeighted | kUnweighted;
399  return outprops;
400}
401
402/// Gets the properties for the MutableFst::AddArc method.
403///
404/// \param inprops  the current properties of the fst
405/// \param s        the id of the state to which an arc is being added
406/// \param arc      the arc being added to the state with the specified id
407/// \param prev_arc the previously-added (or "last") arc of state s, or NULL if
408///                 s currently has no arcs
409template <typename A>
410uint64 AddArcProperties(uint64 inprops, typename A::StateId s,
411                        const A &arc, const A *prev_arc) {
412  uint64 outprops = inprops;
413  if (arc.ilabel != arc.olabel) {
414    outprops |= kNotAcceptor;
415    outprops &= ~kAcceptor;
416  }
417  if (arc.ilabel == 0) {
418    outprops |= kIEpsilons;
419    outprops &= ~kNoIEpsilons;
420    if (arc.olabel == 0) {
421      outprops |= kEpsilons;
422      outprops &= ~kNoEpsilons;
423    }
424  }
425  if (arc.olabel == 0) {
426    outprops |= kOEpsilons;
427    outprops &= ~kNoOEpsilons;
428  }
429  if (prev_arc != 0) {
430    if (prev_arc->ilabel > arc.ilabel) {
431      outprops |= kNotILabelSorted;
432      outprops &= ~kILabelSorted;
433    }
434    if (prev_arc->olabel > arc.olabel) {
435      outprops |= kNotOLabelSorted;
436      outprops &= ~kOLabelSorted;
437    }
438  }
439  if (arc.weight != A::Weight::Zero() && arc.weight != A::Weight::One()) {
440    outprops |= kWeighted;
441    outprops &= ~kUnweighted;
442  }
443  if (arc.nextstate <= s) {
444    outprops |= kNotTopSorted;
445    outprops &= ~kTopSorted;
446  }
447  outprops &= kAddArcProperties | kAcceptor |
448              kNoEpsilons | kNoIEpsilons | kNoOEpsilons |
449              kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted;
450  if (outprops & kTopSorted) {
451    outprops |= kAcyclic | kInitialAcyclic;
452  }
453  return outprops;
454}
455
456extern const char *PropertyNames[];
457
458}  // namespace fst
459
460#endif  // FST_LIB_PROPERTIES_H__
461