1// fst.cc 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: riley@google.com (Michael Riley) 17// 18// \file 19// FST definitions. 20 21#include <fst/fst.h> 22 23// Include these so they are registered 24#include <fst/compact-fst.h> 25#include <fst/const-fst.h> 26#include <fst/matcher-fst.h> 27#include <fst/vector-fst.h> 28#include <fst/edit-fst.h> 29 30// FST flag definitions 31 32DEFINE_bool(fst_verify_properties, false, 33 "Verify fst properties queried by TestProperties"); 34 35DEFINE_string(fst_weight_separator, ",", 36 "Character separator between printed composite weights; " 37 "must be a single character"); 38 39DEFINE_string(fst_weight_parentheses, "", 40 "Characters enclosing the first weight of a printed composite " 41 "weight (e.g. pair weight, tuple weight and derived classes) to " 42 "ensure proper I/O of nested composite weights; " 43 "must have size 0 (none) or 2 (open and close parenthesis)"); 44 45DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache"); 46 47DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL, 48 "Cache byte size that triggers garbage collection"); 49 50DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate"); 51 52DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file"); 53DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file"); 54 55DEFINE_string(fst_read_mode, "read", 56 "Default file reading mode for mappable files"); 57 58namespace fst { 59 60// Register VectorFst, ConstFst and EditFst for common arcs types 61REGISTER_FST(VectorFst, StdArc); 62REGISTER_FST(VectorFst, LogArc); 63REGISTER_FST(VectorFst, Log64Arc); 64REGISTER_FST(ConstFst, StdArc); 65REGISTER_FST(ConstFst, LogArc); 66REGISTER_FST(ConstFst, Log64Arc); 67REGISTER_FST(EditFst, StdArc); 68REGISTER_FST(EditFst, LogArc); 69REGISTER_FST(EditFst, Log64Arc); 70 71// Register CompactFst for common arcs with the default (uint32) size type 72static FstRegisterer< 73 CompactFst<StdArc, StringCompactor<StdArc> > > 74CompactFst_StdArc_StringCompactor_registerer; 75static FstRegisterer< 76 CompactFst<LogArc, StringCompactor<LogArc> > > 77CompactFst_LogArc_StringCompactor_registerer; 78static FstRegisterer< 79 CompactFst<StdArc, WeightedStringCompactor<StdArc> > > 80CompactFst_StdArc_WeightedStringCompactor_registerer; 81static FstRegisterer< 82 CompactFst<LogArc, WeightedStringCompactor<LogArc> > > 83CompactFst_LogArc_WeightedStringCompactor_registerer; 84static FstRegisterer< 85 CompactFst<StdArc, AcceptorCompactor<StdArc> > > 86CompactFst_StdArc_AcceptorCompactor_registerer; 87static FstRegisterer< 88 CompactFst<LogArc, AcceptorCompactor<LogArc> > > 89CompactFst_LogArc_AcceptorCompactor_registerer; 90static FstRegisterer< 91 CompactFst<StdArc, UnweightedCompactor<StdArc> > > 92CompactFst_StdArc_UnweightedCompactor_registerer; 93static FstRegisterer< 94 CompactFst<LogArc, UnweightedCompactor<LogArc> > > 95CompactFst_LogArc_UnweightedCompactor_registerer; 96static FstRegisterer< 97 CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > > 98CompactFst_StdArc_UnweightedAcceptorCompactor_registerer; 99static FstRegisterer< 100 CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > > 101CompactFst_LogArc_UnweightedAcceptorCompactor_registerer; 102 103// Fst type definitions for lookahead Fsts. 104extern const char arc_lookahead_fst_type[] = "arc_lookahead"; 105extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead"; 106extern const char olabel_lookahead_fst_type[] = "olabel_lookahead"; 107 108// Identifies stream data as an FST (and its endianity) 109static const int32 kFstMagicNumber = 2125659606; 110 111// Check for Fst magic number in stream, to indicate 112// caller function that the stream content is an Fst header; 113bool IsFstHeader(istream &strm, const string &source) { 114 int64 pos = strm.tellg(); 115 bool match = true; 116 int32 magic_number = 0; 117 ReadType(strm, &magic_number); 118 if (magic_number != kFstMagicNumber 119 ) { 120 match = false; 121 } 122 strm.seekg(pos); 123 return match; 124} 125 126// Check Fst magic number and read in Fst header. 127// If rewind = true, reposition stream to before call (if possible). 128bool FstHeader::Read(istream &strm, const string &source, bool rewind) { 129 int64 pos = 0; 130 if (rewind) pos = strm.tellg(); 131 int32 magic_number = 0; 132 ReadType(strm, &magic_number); 133 if (magic_number != kFstMagicNumber 134 ) { 135 LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source; 136 if (rewind) strm.seekg(pos); 137 return false; 138 } 139 140 ReadType(strm, &fsttype_); 141 ReadType(strm, &arctype_); 142 ReadType(strm, &version_); 143 ReadType(strm, &flags_); 144 ReadType(strm, &properties_); 145 ReadType(strm, &start_); 146 ReadType(strm, &numstates_); 147 ReadType(strm, &numarcs_); 148 if (!strm) { 149 LOG(ERROR) << "FstHeader::Read: read failed: " << source; 150 return false; 151 } 152 if (rewind) strm.seekg(pos); 153 return true; 154} 155 156// Write Fst magic number and Fst header. 157bool FstHeader::Write(ostream &strm, const string &source) const { 158 WriteType(strm, kFstMagicNumber); 159 WriteType(strm, fsttype_); 160 WriteType(strm, arctype_); 161 WriteType(strm, version_); 162 WriteType(strm, flags_); 163 WriteType(strm, properties_); 164 WriteType(strm, start_); 165 WriteType(strm, numstates_); 166 WriteType(strm, numarcs_); 167 return true; 168} 169 170FstReadOptions::FstReadOptions(const string& src, const FstHeader *hdr, 171 const SymbolTable* isym, const SymbolTable* osym) 172 : source(src), header(hdr), isymbols(isym), osymbols(osym) { 173 mode = ReadMode(FLAGS_fst_read_mode); 174} 175 176FstReadOptions::FstReadOptions(const string& src, const SymbolTable* isym, 177 const SymbolTable* osym) 178 : source(src), header(0), isymbols(isym), osymbols(osym) { 179 mode = ReadMode(FLAGS_fst_read_mode); 180} 181 182FstReadOptions::FileReadMode FstReadOptions::ReadMode(const string &mode) { 183 if (mode == "read") { 184 return READ; 185 } 186 if (mode == "map") { 187 return MAP; 188 } 189 LOG(ERROR) << "Unknown file read mode " << mode; 190 return READ; 191} 192 193} // namespace fst 194