1// properties.cc
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: riley@google.com (Michael Riley)
17//
18// \file
19// Functions for updating property bits for various FST operations and
20// string names of the properties.
21
22#include <fst/properties.h>
23
24#include <stddef.h>
25#include <vector>
26using std::vector;
27
28namespace fst {
29
30// These functions determine the properties associated with the FST
31// result of various finite-state operations. The property arguments
32// correspond to the operation's FST arguments. The properties
33// returned assume the operation modifies its first argument.
34// Bitwise-and this result with kCopyProperties for the case when a
35// new (possibly delayed) FST is instead constructed.
36
37// Properties for a concatenatively-closed FST.
38uint64 ClosureProperties(uint64 inprops, bool star, bool delayed) {
39  uint64 outprops = (kError | kAcceptor | kUnweighted | kAccessible) & inprops;
40  if (!delayed)
41       outprops |= (kExpanded | kMutable | kCoAccessible |
42                    kNotTopSorted | kNotString) & inprops;
43  if (!delayed || inprops & kAccessible)
44    outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
45                 kNotILabelSorted | kNotOLabelSorted | kWeighted |
46                 kNotAccessible | kNotCoAccessible) & inprops;
47  return outprops;
48}
49
50// Properties for a complemented FST.
51uint64 ComplementProperties(uint64 inprops) {
52  uint64 outprops = kAcceptor | kUnweighted | kNoEpsilons |
53                    kNoIEpsilons | kNoOEpsilons |
54                    kIDeterministic | kODeterministic | kAccessible;
55  outprops |= (kError | kILabelSorted | kOLabelSorted | kInitialCyclic) &
56      inprops;
57  if (inprops & kAccessible)
58    outprops |= kNotILabelSorted | kNotOLabelSorted | kCyclic;
59  return outprops;
60}
61
62// Properties for a composed FST.
63uint64 ComposeProperties(uint64 inprops1, uint64 inprops2) {
64  uint64 outprops = kError & (inprops1 | inprops2);
65  if (inprops1 & kAcceptor && inprops2 & kAcceptor) {
66    outprops |= kAcceptor | kAccessible;
67    outprops |= (kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kAcyclic |
68                 kInitialAcyclic) & inprops1 & inprops2;
69    if (kNoIEpsilons & inprops1 & inprops2)
70      outprops |= (kIDeterministic | kODeterministic) & inprops1 & inprops2;
71  } else {
72    outprops |= kAccessible;
73    outprops |= (kAcceptor | kNoIEpsilons | kAcyclic | kInitialAcyclic) &
74        inprops1 & inprops2;
75    if (kNoIEpsilons & inprops1 & inprops2)
76      outprops |= kIDeterministic & inprops1 & inprops2;
77  }
78  return outprops;
79}
80
81// Properties for a concatenated FST.
82uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
83  uint64 outprops =
84    (kAcceptor | kUnweighted | kAcyclic) & inprops1 & inprops2;
85  outprops |= kError & (inprops1 | inprops2);
86
87  bool empty1 = delayed;  // Can fst1 be the empty machine?
88  bool empty2 = delayed;  // Can fst2 be the empty machine?
89
90  if (!delayed) {
91    outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
92    outprops |= (kNotTopSorted | kNotString) & inprops2;
93  }
94  if (!empty1)
95    outprops |= (kInitialAcyclic | kInitialCyclic) & inprops1;
96  if (!delayed || inprops1 & kAccessible)
97    outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
98                 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
99                 kNotOLabelSorted | kWeighted | kCyclic |
100                 kNotAccessible | kNotCoAccessible) & inprops1;
101  if ((inprops1 & (kAccessible | kCoAccessible)) ==
102      (kAccessible | kCoAccessible) && !empty1) {
103    outprops |= kAccessible & inprops2;
104    if (!empty2)
105      outprops |= kCoAccessible & inprops2;
106    if (!delayed || inprops2 & kAccessible)
107      outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
108                   kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
109                   kNotOLabelSorted | kWeighted | kCyclic |
110                   kNotAccessible | kNotCoAccessible) & inprops2;
111  }
112  return outprops;
113}
114
115// Properties for a determinized FST.
116uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label) {
117  uint64 outprops = kAccessible;
118  if (((kAcceptor | kNoIEpsilons) & inprops) || has_subsequential_label)
119    outprops |= kIDeterministic;
120  outprops |= (kError | kAcceptor | kNoEpsilons | kAcyclic |
121               kInitialAcyclic | kCoAccessible | kString) & inprops;
122  if (inprops & kAccessible)
123     outprops |= (kNotAcceptor | kEpsilons | kIEpsilons | kOEpsilons |
124                  kCyclic) & inprops;
125  if (inprops & kAcceptor)
126    outprops |= (kNoIEpsilons | kNoOEpsilons) & inprops;
127  if ((inprops & kNoIEpsilons) && has_subsequential_label)
128    outprops |= kNoIEpsilons;
129  return outprops;
130}
131
132// Properties for factored weight FST.
133uint64 FactorWeightProperties(uint64 inprops) {
134  uint64 outprops = (kExpanded | kMutable | kError | kAcceptor |
135                     kAcyclic | kAccessible | kCoAccessible) & inprops;
136  if (inprops & kAccessible)
137    outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
138                 kEpsilons | kIEpsilons | kOEpsilons | kCyclic |
139                 kNotILabelSorted | kNotOLabelSorted)
140        & inprops;
141  return outprops;
142}
143
144// Properties for an inverted FST.
145uint64 InvertProperties(uint64 inprops) {
146  uint64 outprops = (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor |
147                     kEpsilons | kNoEpsilons | kWeighted | kUnweighted |
148                     kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
149                     kTopSorted | kNotTopSorted |
150                     kAccessible | kNotAccessible |
151                     kCoAccessible | kNotCoAccessible |
152                     kString | kNotString) & inprops;
153  if (kIDeterministic & inprops)
154    outprops |= kODeterministic;
155  if (kNonIDeterministic & inprops)
156    outprops |= kNonODeterministic;
157  if (kODeterministic & inprops)
158    outprops |= kIDeterministic;
159  if (kNonODeterministic & inprops)
160    outprops |= kNonIDeterministic;
161
162  if (kIEpsilons & inprops)
163    outprops |= kOEpsilons;
164  if (kNoIEpsilons & inprops)
165    outprops |= kNoOEpsilons;
166  if (kOEpsilons & inprops)
167    outprops |= kIEpsilons;
168  if (kNoOEpsilons & inprops)
169    outprops |= kNoIEpsilons;
170
171  if (kILabelSorted & inprops)
172    outprops |= kOLabelSorted;
173  if (kNotILabelSorted & inprops)
174    outprops |= kNotOLabelSorted;
175  if (kOLabelSorted & inprops)
176    outprops |= kILabelSorted;
177  if (kNotOLabelSorted & inprops)
178    outprops |= kNotILabelSorted;
179  return outprops;
180}
181
182// Properties for a projected FST.
183uint64 ProjectProperties(uint64 inprops, bool project_input) {
184  uint64 outprops = kAcceptor;
185  outprops |= (kExpanded | kMutable | kError | kWeighted | kUnweighted |
186               kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic |
187               kTopSorted | kNotTopSorted | kAccessible | kNotAccessible |
188               kCoAccessible | kNotCoAccessible |
189               kString | kNotString) & inprops;
190  if (project_input) {
191    outprops |= (kIDeterministic | kNonIDeterministic |
192                 kIEpsilons | kNoIEpsilons |
193                 kILabelSorted | kNotILabelSorted) & inprops;
194
195    if (kIDeterministic & inprops)
196      outprops |= kODeterministic;
197    if (kNonIDeterministic & inprops)
198      outprops |= kNonODeterministic;
199
200    if (kIEpsilons & inprops)
201      outprops |= kOEpsilons | kEpsilons;
202    if (kNoIEpsilons & inprops)
203      outprops |= kNoOEpsilons | kNoEpsilons;
204
205    if (kILabelSorted & inprops)
206      outprops |= kOLabelSorted;
207    if (kNotILabelSorted & inprops)
208      outprops |= kNotOLabelSorted;
209  } else {
210    outprops |= (kODeterministic | kNonODeterministic |
211                 kOEpsilons | kNoOEpsilons |
212                 kOLabelSorted | kNotOLabelSorted) & inprops;
213
214    if (kODeterministic & inprops)
215      outprops |= kIDeterministic;
216    if (kNonODeterministic & inprops)
217      outprops |= kNonIDeterministic;
218
219    if (kOEpsilons & inprops)
220      outprops |= kIEpsilons | kEpsilons;
221    if (kNoOEpsilons & inprops)
222      outprops |= kNoIEpsilons | kNoEpsilons;
223
224    if (kOLabelSorted & inprops)
225      outprops |= kILabelSorted;
226    if (kNotOLabelSorted & inprops)
227      outprops |= kNotILabelSorted;
228  }
229  return outprops;
230}
231
232// Properties for a randgen FST.
233uint64 RandGenProperties(uint64 inprops, bool weighted) {
234  uint64 outprops = kAcyclic | kInitialAcyclic | kAccessible;
235  outprops |= inprops & kError;
236  if (weighted) {
237    outprops |= kTopSorted;
238    outprops |= (kAcceptor | kNoEpsilons |
239                 kNoIEpsilons | kNoOEpsilons |
240                 kIDeterministic | kODeterministic |
241                 kILabelSorted | kOLabelSorted) & inprops;
242  } else {
243    outprops |= kUnweighted;
244    outprops |= (kAcceptor | kILabelSorted | kOLabelSorted) & inprops;
245  }
246  return outprops;
247}
248
249// Properties for a replace FST.
250uint64 ReplaceProperties(const vector<uint64>& inprops,
251                         ssize_t root,
252                         bool epsilon_on_replace,
253                         bool no_empty_fsts) {
254  if (inprops.size() == 0)
255    return kNullProperties;
256  uint64 outprops = 0;
257  for (size_t i = 0; i < inprops.size(); ++i)
258    outprops |= kError & inprops[i];
259  uint64 access_props = no_empty_fsts ? kAccessible | kCoAccessible : 0;
260  for (size_t i = 0; i < inprops.size(); ++i)
261    access_props &= (inprops[i] & (kAccessible | kCoAccessible));
262  if (access_props == (kAccessible | kCoAccessible)) {
263    outprops |= access_props;
264    if (inprops[root] & kInitialCyclic)
265      outprops |= kInitialCyclic;
266    uint64 props =  0;
267    bool string = true;
268    for (size_t i = 0; i < inprops.size(); ++i) {
269      if (epsilon_on_replace == false)
270        props |= kNotAcceptor & inprops[i];
271      props |= (kNonIDeterministic | kNonODeterministic | kEpsilons |
272                kIEpsilons | kOEpsilons | kWeighted | kCyclic |
273                kNotTopSorted | kNotString) & inprops[i];
274      if (!(inprops[i] & kString))
275        string = false;
276    }
277    outprops |= props;
278    if (string)
279      outprops |= kString;
280  }
281  bool acceptor = epsilon_on_replace;
282  bool ideterministic = !epsilon_on_replace;
283  bool no_iepsilons = !epsilon_on_replace;
284  bool acyclic = true;
285  bool unweighted = true;
286  for (size_t i = 0; i < inprops.size(); ++i) {
287    if (!(inprops[i] & kAcceptor))
288      acceptor = false;
289    if (!(inprops[i] & kIDeterministic))
290      ideterministic = false;
291    if (!(inprops[i] & kNoIEpsilons))
292      no_iepsilons = false;
293    if (!(inprops[i] & kAcyclic))
294      acyclic = false;
295    if (!(inprops[i] & kUnweighted))
296      unweighted = false;
297  }
298  if (acceptor)
299    outprops |= kAcceptor;
300  if (ideterministic)
301    outprops |= kIDeterministic;
302  if (no_iepsilons)
303    outprops |= kNoIEpsilons;
304  if (acyclic)
305    outprops |= kAcyclic;
306  if (unweighted)
307    outprops |= kUnweighted;
308  if (inprops[root] & kInitialAcyclic)
309      outprops |= kInitialAcyclic;
310  return outprops;
311}
312
313// Properties for a relabeled FST.
314uint64 RelabelProperties(uint64 inprops) {
315  uint64 outprops = (kExpanded | kMutable | kError |
316                     kWeighted | kUnweighted |
317                     kCyclic | kAcyclic |
318                     kInitialCyclic | kInitialAcyclic |
319                     kTopSorted | kNotTopSorted |
320                     kAccessible | kNotAccessible |
321                     kCoAccessible | kNotCoAccessible |
322                     kString | kNotString) & inprops;
323  return outprops;
324}
325
326// Properties for a reversed FST. (the superinitial state limits this set)
327uint64 ReverseProperties(uint64 inprops) {
328  uint64 outprops =
329    (kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kEpsilons |
330     kIEpsilons | kOEpsilons | kWeighted | kUnweighted |
331     kCyclic | kAcyclic) & inprops;
332  return outprops;
333}
334
335// Properties for re-weighted FST.
336uint64 ReweightProperties(uint64 inprops) {
337  uint64 outprops = inprops & kWeightInvariantProperties;
338  outprops = outprops & ~kCoAccessible;
339  return outprops;
340}
341
342// Properties for an epsilon-removed FST.
343uint64 RmEpsilonProperties(uint64 inprops, bool delayed) {
344  uint64 outprops = kNoEpsilons;
345  outprops |= (kError | kAcceptor | kAcyclic | kInitialAcyclic) & inprops;
346  if (inprops & kAcceptor)
347    outprops |= kNoIEpsilons | kNoOEpsilons;
348  if (!delayed) {
349    outprops |= kExpanded | kMutable;
350    outprops |= kTopSorted & inprops;
351  }
352  if (!delayed || inprops & kAccessible)
353    outprops |= kNotAcceptor & inprops;
354  return outprops;
355}
356
357// Properties for shortest path. This function computes how the properties
358// of the output of shortest path need to be updated, given that 'props' is
359// already known.
360uint64 ShortestPathProperties(uint64 props) {
361  return props | kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
362}
363
364// Properties for a synchronized FST.
365uint64 SynchronizeProperties(uint64 inprops) {
366  uint64 outprops = (kError | kAcceptor | kAcyclic | kAccessible |
367                     kCoAccessible | kUnweighted) & inprops;
368  if (inprops & kAccessible)
369    outprops |= (kCyclic | kNotCoAccessible | kWeighted) & inprops;
370  return outprops;
371}
372
373// Properties for a unioned FST.
374uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed) {
375  uint64 outprops = (kAcceptor | kUnweighted | kAcyclic | kAccessible)
376                    & inprops1 & inprops2;
377  outprops |= kError & (inprops1 | inprops2);
378
379  bool empty1 = delayed;  // Can fst1 be the empty machine?
380  bool empty2 = delayed;  // Can fst2 be the empty machine?
381  if (!delayed) {
382    outprops |= (kExpanded | kMutable | kNotTopSorted | kNotString) & inprops1;
383    outprops |= (kNotTopSorted | kNotString) & inprops2;
384  }
385  if (!empty1 && !empty2) {
386    outprops |= kEpsilons | kIEpsilons | kOEpsilons;
387    outprops |= kCoAccessible & inprops1 & inprops2;
388  }
389  // Note kNotCoAccessible does not hold because of kInitialAcyclic opt.
390  if (!delayed || inprops1 & kAccessible)
391    outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
392                 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
393                 kNotOLabelSorted | kWeighted | kCyclic |
394                 kNotAccessible) & inprops1;
395  if (!delayed || inprops2 & kAccessible)
396    outprops |= (kNotAcceptor | kNonIDeterministic | kNonODeterministic |
397                 kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted |
398                 kNotOLabelSorted | kWeighted | kCyclic |
399                 kNotAccessible | kNotCoAccessible) & inprops2;
400  return outprops;
401}
402
403
404// Property string names (indexed by bit position).
405const char *PropertyNames[] = {
406  // binary
407  "expanded", "mutable", "error", "", "", "", "", "",
408  "", "", "", "", "", "", "", "",
409  // trinary
410  "acceptor", "not acceptor",
411  "input deterministic", "non input deterministic",
412  "output deterministic", "non output deterministic",
413  "input/output epsilons", "no input/output epsilons",
414  "input epsilons", "no input epsilons",
415  "output epsilons", "no output epsilons",
416  "input label sorted", "not input label sorted",
417  "output label sorted", "not output label sorted",
418  "weighted", "unweighted",
419  "cyclic", "acyclic",
420  "cyclic at initial state", "acyclic at initial state",
421  "top sorted", "not top sorted",
422  "accessible", "not accessible",
423  "coaccessible", "not coaccessible",
424  "string", "not string",
425};
426
427}  // namespace fst
428