1// properties.cc
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Functions for updating property bits for various FST operations and
20// string names of the properties.
21
22#include <fst/properties.h>
23
24#include <stddef.h>
25#include <vector>
26using std::vector;
27
28namespace fst {
29
30// These functions determine the properties associated with the FST
31// result of various finite-state operations. The property arguments
32// correspond to the operation's FST arguments. The properties
33// returned assume the operation modifies its first argument.
34// Bitwise-and this result with kCopyProperties for the case when a
35// new (possibly delayed) FST is instead constructed.
36
37// Properties for a concatenatively-closed FST.
38uint64 ClosureProperties(uint64 inprops, bool star, bool delayed) {
39  uint64 outprops = (kError | kAcceptor | kUnweighted | kAccessible) & inprops;
40  if (!delayed)
41       outprops |= (kExpanded | kMutable | kCoAccessible |
42                    kNotTopSorted | kNotString) & inprops;
43  if (!delayed || inprops & kAccessible)
44    outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
45                 kNotILabelSorted | kNotOLabelSorted | kWeighted |
46                 kNotAccessible | kNotCoAccessible) & inprops;
47  return outprops;
48}
49
50// Properties for a complemented FST.
51uint64 ComplementProperties(uint64 inprops) {
52  uint64 outprops = kAcceptor | kUnweighted | kNoEpsilons |
53                    kNoIEpsilons | kNoOEpsilons |
54                    kIDeterministic | kODeterministic | kAccessible;
55  outprops |= (kError | kILabelSorted | kOLabelSorted | kInitialCyclic) &
56      inprops;
57  if (inprops & kAccessible)
58    outprops |= kNotILabelSorted | kNotOLabelSorted | kCyclic;
59  return outprops;
60}
61
62// Properties for a composed FST.
63uint64 ComposeProperties(uint64 inprops1, uint64 inprops2) {
64  uint64 outprops = kError & (inprops1 | inprops2);
65  if (inprops1 & kAcceptor && inprops2 & kAcceptor) {
66    outprops |= kAcceptor | kAccessible;
67    outprops |= (kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kAcyclic |
68                 kInitialAcyclic) & inprops1 & inprops2;
69    if (kNoIEpsilons & inprops1 & inprops2)
70      outprops |= (kIDeterministic | kODeterministic) & inprops1 & inprops2;
71  } else {
72    outprops |= kAccessible;
73    outprops |= (kAcceptor | kNoIEpsilons | kAcyclic | kInitialAcyclic) &
74        inprops1 & inprops2;
75    if (kNoIEpsilons & inprops1 & inprops2)
76      outprops |= kIDeterministic & inprops1 & inprops2;
77  }
78  return outprops;
79}
80
81// Properties for a concatenated FST.
82uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
83  uint64 outprops =
84    (kAcceptor | kUnweighted | kAcyclic) & inprops1 & inprops2;
85  outprops |= kError & (inprops1 | inprops2);
86
87  bool empty1 = delayed;  // Can fst1 be the empty machine?
88  bool empty2 = delayed;  // Can fst2 be the empty machine?
89
90  if (!delayed) {
91    outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
92    outprops |= (kNotTopSorted | kNotString) & inprops2;
93  }
94  if (!empty1)
95    outprops |= (kInitialAcyclic | kInitialCyclic) & inprops1;
96  if (!delayed || inprops1 & kAccessible)
97    outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
98                 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
99                 kNotOLabelSorted | kWeighted | kCyclic |
100                 kNotAccessible | kNotCoAccessible) & inprops1;
101  if ((inprops1 & (kAccessible | kCoAccessible)) ==
102      (kAccessible | kCoAccessible) && !empty1) {
103    outprops |= kAccessible & inprops2;
104    if (!empty2)
105      outprops |= kCoAccessible & inprops2;
106    if (!delayed || inprops2 & kAccessible)
107      outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
108                   kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
109                   kNotOLabelSorted | kWeighted | kCyclic |
110                   kNotAccessible | kNotCoAccessible) & inprops2;
111  }
112  return outprops;
113}
114
115// Properties for a determinized FST.
116uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) {
117  uint64 outprops = kAccessible;
118  if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label)
119    outprops |= kIDeterministic;
120  outprops |= (kError | kAcceptor | kAcyclic |
121               kInitialAcyclic | kCoAccessible | kString) & inprops;
122  if (inprops & kNoIEpsilons)
123    outprops |= kNoEpsilons & inprops;
124  if (inprops & kAccessible)
125     outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons |
126                  kCyclic) & inprops;
127  if (inprops & kAcceptor)
128    outprops |= (kNoIEpsilons | kNoOEpsilons) & inprops;
129  if ((inprops & kNoIEpsilons) && has_subsequential_label)
130    outprops |= kNoIEpsilons;
131  return outprops;
132}
133
134// Properties for factored weight FST.
135uint64 FactorWeightProperties(uint64 inprops) {
136  uint64 outprops = (kExpanded | kMutable | kError | kAcceptor |
137                     kAcyclic | kAccessible | kCoAccessible) & inprops;
138  if (inprops & kAccessible)
139    outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
140                 kEpsilons | kIEpsilons | kOEpsilons | kCyclic |
141                 kNotILabelSorted | kNotOLabelSorted)
142        & inprops;
143  return outprops;
144}
145
146// Properties for an inverted FST.
147uint64 InvertProperties(uint64 inprops) {
148  uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
149                     kEpsilons | kNoEpsilons | kWeighted | kUnweighted |
150                     kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
151                     kTopSorted | kNotTopSorted |
152                     kAccessible | kNotAccessible |
153                     kCoAccessible | kNotCoAccessible |
154                     kString | kNotString) & inprops;
155  if (kIDeterministic & inprops)
156    outprops |= kODeterministic;
157  if (kNonIDeterministic & inprops)
158    outprops |= kNonODeterministic;
159  if (kODeterministic & inprops)
160    outprops |= kIDeterministic;
161  if (kNonODeterministic & inprops)
162    outprops |= kNonIDeterministic;
163
164  if (kIEpsilons & inprops)
165    outprops |= kOEpsilons;
166  if (kNoIEpsilons & inprops)
167    outprops |= kNoOEpsilons;
168  if (kOEpsilons & inprops)
169    outprops |= kIEpsilons;
170  if (kNoOEpsilons & inprops)
171    outprops |= kNoIEpsilons;
172
173  if (kILabelSorted & inprops)
174    outprops |= kOLabelSorted;
175  if (kNotILabelSorted & inprops)
176    outprops |= kNotOLabelSorted;
177  if (kOLabelSorted & inprops)
178    outprops |= kILabelSorted;
179  if (kNotOLabelSorted & inprops)
180    outprops |= kNotILabelSorted;
181  return outprops;
182}
183
184// Properties for a projected FST.
185uint64 ProjectProperties(uint64 inprops, bool project_input) {
186  uint64 outprops = kAcceptor;
187  outprops |= (kExpanded | kMutable | kError | kWeighted | kUnweighted |
188               kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
189               kTopSorted | kNotTopSorted | kAccessible | kNotAccessible |
190               kCoAccessible | kNotCoAccessible |
191               kString | kNotString) & inprops;
192  if (project_input) {
193    outprops |= (kIDeterministic | kNonIDeterministic |
194                 kIEpsilons | kNoIEpsilons |
195                 kILabelSorted | kNotILabelSorted) & inprops;
196
197    if (kIDeterministic & inprops)
198      outprops |= kODeterministic;
199    if (kNonIDeterministic & inprops)
200      outprops |= kNonODeterministic;
201
202    if (kIEpsilons & inprops)
203      outprops |= kOEpsilons | kEpsilons;
204    if (kNoIEpsilons & inprops)
205      outprops |= kNoOEpsilons | kNoEpsilons;
206
207    if (kILabelSorted & inprops)
208      outprops |= kOLabelSorted;
209    if (kNotILabelSorted & inprops)
210      outprops |= kNotOLabelSorted;
211  } else {
212    outprops |= (kODeterministic | kNonODeterministic |
213                 kOEpsilons | kNoOEpsilons |
214                 kOLabelSorted | kNotOLabelSorted) & inprops;
215
216    if (kODeterministic & inprops)
217      outprops |= kIDeterministic;
218    if (kNonODeterministic & inprops)
219      outprops |= kNonIDeterministic;
220
221    if (kOEpsilons & inprops)
222      outprops |= kIEpsilons | kEpsilons;
223    if (kNoOEpsilons & inprops)
224      outprops |= kNoIEpsilons | kNoEpsilons;
225
226    if (kOLabelSorted & inprops)
227      outprops |= kILabelSorted;
228    if (kNotOLabelSorted & inprops)
229      outprops |= kNotILabelSorted;
230  }
231  return outprops;
232}
233
234// Properties for a randgen FST.
235uint64 RandGenProperties(uint64 inprops, bool weighted) {
236  uint64 outprops = kAcyclic | kInitialAcyclic | kAccessible;
237  outprops |= inprops & kError;
238  if (weighted) {
239    outprops |= kTopSorted;
240    outprops |= (kAcceptor | kNoEpsilons |
241                 kNoIEpsilons | kNoOEpsilons |
242                 kIDeterministic | kODeterministic |
243                 kILabelSorted | kOLabelSorted) & inprops;
244  } else {
245    outprops |= kUnweighted;
246    outprops |= (kAcceptor | kILabelSorted | kOLabelSorted) & inprops;
247  }
248  return outprops;
249}
250
251// Properties for a replace FST.
252uint64 ReplaceProperties(const vector<uint64>& inprops,
253                         ssize_t root,
254                         bool epsilon_on_replace,
255                         bool no_empty_fsts) {
256  if (inprops.size() == 0)
257    return kNullProperties;
258  uint64 outprops = 0;
259  for (size_t i = 0; i < inprops.size(); ++i)
260    outprops |= kError & inprops[i];
261  uint64 access_props = no_empty_fsts ? kAccessible | kCoAccessible : 0;
262  for (size_t i = 0; i < inprops.size(); ++i)
263    access_props &= (inprops[i] & (kAccessible | kCoAccessible));
264  if (access_props == (kAccessible | kCoAccessible)) {
265    outprops |= access_props;
266    if (inprops[root] & kInitialCyclic)
267      outprops |= kInitialCyclic;
268    uint64 props =  0;
269    bool string = true;
270    for (size_t i = 0; i < inprops.size(); ++i) {
271      if (epsilon_on_replace == false)
272        props |= kNotAcceptor & inprops[i];
273      props |= (kNonIDeterministic | kNonODeterministic | kEpsilons |
274                kIEpsilons | kOEpsilons | kWeighted | kCyclic |
275                kNotTopSorted | kNotString) & inprops[i];
276      if (!(inprops[i] & kString))
277        string = false;
278    }
279    outprops |= props;
280    if (string)
281      outprops |= kString;
282  }
283  bool acceptor = epsilon_on_replace;
284  bool ideterministic = !epsilon_on_replace;
285  bool no_iepsilons = !epsilon_on_replace;
286  bool acyclic = true;
287  bool unweighted = true;
288  for (size_t i = 0; i < inprops.size(); ++i) {
289    if (!(inprops[i] & kAcceptor))
290      acceptor = false;
291    if (!(inprops[i] & kIDeterministic))
292      ideterministic = false;
293    if (!(inprops[i] & kNoIEpsilons))
294      no_iepsilons = false;
295    if (!(inprops[i] & kAcyclic))
296      acyclic = false;
297    if (!(inprops[i] & kUnweighted))
298      unweighted = false;
299  }
300  if (acceptor)
301    outprops |= kAcceptor;
302  if (ideterministic)
303    outprops |= kIDeterministic;
304  if (no_iepsilons)
305    outprops |= kNoIEpsilons;
306  if (acyclic)
307    outprops |= kAcyclic;
308  if (unweighted)
309    outprops |= kUnweighted;
310  if (inprops[root] & kInitialAcyclic)
311      outprops |= kInitialAcyclic;
312  return outprops;
313}
314
315// Properties for a relabeled FST.
316uint64 RelabelProperties(uint64 inprops) {
317  uint64 outprops = (kExpanded | kMutable | kError |
318                     kWeighted | kUnweighted |
319                     kCyclic | kAcyclic |
320                     kInitialCyclic | kInitialAcyclic |
321                     kTopSorted | kNotTopSorted |
322                     kAccessible | kNotAccessible |
323                     kCoAccessible | kNotCoAccessible |
324                     kString | kNotString) & inprops;
325  return outprops;
326}
327
328// Properties for a reversed FST. (the superinitial state limits this set)
329uint64 ReverseProperties(uint64 inprops) {
330  uint64 outprops =
331    (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kEpsilons |
332     kIEpsilons | kOEpsilons | kWeighted | kUnweighted |
333     kCyclic | kAcyclic) & inprops;
334  return outprops;
335}
336
337// Properties for re-weighted FST.
338uint64 ReweightProperties(uint64 inprops) {
339  uint64 outprops = inprops & kWeightInvariantProperties;
340  outprops = outprops & ~kCoAccessible;
341  return outprops;
342}
343
344// Properties for an epsilon-removed FST.
345uint64 RmEpsilonProperties(uint64 inprops, bool delayed) {
346  uint64 outprops = kNoEpsilons;
347  outprops |= (kError | kAcceptor | kAcyclic | kInitialAcyclic) & inprops;
348  if (inprops & kAcceptor)
349    outprops |= kNoIEpsilons | kNoOEpsilons;
350  if (!delayed) {
351    outprops |= kExpanded | kMutable;
352    outprops |= kTopSorted & inprops;
353  }
354  if (!delayed || inprops & kAccessible)
355    outprops |= kNotAcceptor & inprops;
356  return outprops;
357}
358
359// Properties for shortest path. This function computes how the properties
360// of the output of shortest path need to be updated, given that 'props' is
361// already known.
362uint64 ShortestPathProperties(uint64 props) {
363  return props | kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
364}
365
366// Properties for a synchronized FST.
367uint64 SynchronizeProperties(uint64 inprops) {
368  uint64 outprops = (kError | kAcceptor | kAcyclic | kAccessible |
369                     kCoAccessible | kUnweighted) & inprops;
370  if (inprops & kAccessible)
371    outprops |= (kCyclic | kNotCoAccessible | kWeighted) & inprops;
372  return outprops;
373}
374
375// Properties for a unioned FST.
376uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
377  uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible)
378                    & inprops1 & inprops2;
379  outprops |= kError & (inprops1 | inprops2);
380  outprops |= kInitialAcyclic;
381
382  bool empty1 = delayed;  // Can fst1 be the empty machine?
383  bool empty2 = delayed;  // Can fst2 be the empty machine?
384  if (!delayed) {
385    outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
386    outprops |= (kNotTopSorted | kNotString) & inprops2;
387  }
388  if (!empty1 && !empty2) {
389    outprops |= kEpsilons | kIEpsilons | kOEpsilons;
390    outprops |= kCoAccessible & inprops1 & inprops2;
391  }
392  // Note kNotCoAccessible does not hold because of kInitialAcyclic opt.
393  if (!delayed || inprops1 & kAccessible)
394    outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
395                 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
396                 kNotOLabelSorted | kWeighted | kCyclic |
397                 kNotAccessible) & inprops1;
398  if (!delayed || inprops2 & kAccessible)
399    outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
400                 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
401                 kNotOLabelSorted | kWeighted | kCyclic |
402                 kNotAccessible | kNotCoAccessible) & inprops2;
403  return outprops;
404}
405
406
407// Property string names (indexed by bit position).
408const char *PropertyNames[] = {
409  // binary
410  "expanded", "mutable", "error", "", "", "", "", "",
411  "", "", "", "", "", "", "", "",
412  // trinary
413  "acceptor", "not acceptor",
414  "input deterministic", "non input deterministic",
415  "output deterministic", "non output deterministic",
416  "input/output epsilons", "no input/output epsilons",
417  "input epsilons", "no input epsilons",
418  "output epsilons", "no output epsilons",
419  "input label sorted", "not input label sorted",
420  "output label sorted", "not output label sorted",
421  "weighted", "unweighted",
422  "cyclic", "acyclic",
423  "cyclic at initial state", "acyclic at initial state",
424  "top sorted", "not top sorted",
425  "accessible", "not accessible",
426  "coaccessible", "not coaccessible",
427  "string", "not string",
428};
429
430}  // namespace fst
431