1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// verify.h
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Function to verify an Fst's contents
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#ifndef FST_LIB_VERIFY_H__
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#define FST_LIB_VERIFY_H__
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
24f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/fst.h>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/test-properties.h>
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
30f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Verifies that an Fst's contents are sane.
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsontemplate<class Arc>
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonbool Verify(const Fst<Arc> &fst, bool allow_negative_labels = false) {
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Label Label;
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::Weight Weight;
35f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  typedef typename Arc::StateId StateId;
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId start = fst.Start();
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable *isyms = fst.InputSymbols();
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const SymbolTable *osyms = fst.OutputSymbols();
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  // Count states
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  StateId ns = 0;
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (StateIterator< Fst<Arc> > siter(fst);
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       !siter.Done();
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       siter.Next())
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    ++ns;
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (start == kNoStateId && ns > 0) {
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Verify: Fst start state ID unset";
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else if (start >= ns) {
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Verify: Fst start state ID exceeds number of states";
53f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (StateIterator< Fst<Arc> > siter(fst);
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       !siter.Done();
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson       siter.Next()) {
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    StateId s = siter.Value();
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    size_t na = 0;
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    for (ArcIterator< Fst<Arc> > aiter(fst, s);
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         !aiter.Done();
63f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson         aiter.Next()) {
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      const Arc &arc =aiter.Value();
65f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      if (!allow_negative_labels && arc.ilabel < 0) {
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "Verify: Fst input label ID of arc at position "
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << na << " of state " << s << " is negative";
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else if (isyms && isyms->Find(arc.ilabel) == "") {
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "Verify: Fst input label ID " << arc.ilabel
71f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << " of arc at position " << na << " of state " <<  s
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << " is missing from input symbol table \""
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << isyms->Name() << "\"";
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else if (!allow_negative_labels && arc.olabel < 0) {
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "Verify: Fst output label ID of arc at position "
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << na << " of state " << s << " is negative";
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
79f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else if (osyms && osyms->Find(arc.olabel) == "") {
80f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "Verify: Fst output label ID " << arc.olabel
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << " of arc at position " << na << " of state " <<  s
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << " is missing from output symbol table \""
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << osyms->Name() << "\"";
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else if (!arc.weight.Member() || arc.weight == Weight::Zero()) {
86f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "Verify: Fst weight of arc at position "
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << na << " of state " << s << " is invalid";
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else if (arc.nextstate < 0) {
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "Verify: Fst destination state ID of arc at position "
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << na << " of state " << s << " is negative";
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      } else if (arc.nextstate >= ns) {
94f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        LOG(ERROR) << "Verify: Fst destination state ID of arc at position "
95f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << na << " of state " << s
96f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                   << " exceeds number of states";
97f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson        return false;
98f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      }
99f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      ++na;
100f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
101f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!fst.Final(s).Member()) {
102f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "Verify: Fst final weight of state " << s << " is invalid";
103f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
104f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
105f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
106f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 fst_props = fst.Properties(kFstProperties, false);
107f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (fst_props & kError) {
108f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Verify: Fst error property is set";
109f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
110f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
111f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
112f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 known_props;
113f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  uint64 test_props = ComputeProperties(fst, kFstProperties, &known_props,
114f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson                                        false);
115f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (!CompatProperties(fst_props, test_props)) {
116f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    LOG(ERROR) << "Verify: stored Fst properties incorrect "
117f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << "(props1 = stored props, props2 = tested)";
118f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return false;
119f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  } else {
120f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return true;
121f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
122f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
123f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
124f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
125f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
126f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#endif  // FST_LIB_VERIFY_H__
127