const-fst.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1// const-fst.h 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: riley@google.com (Michael Riley) 17// 18// \file 19// Simple concrete immutable FST whose states and arcs are each stored 20// in single arrays. 21 22#ifndef FST_LIB_CONST_FST_H__ 23#define FST_LIB_CONST_FST_H__ 24 25#include <string> 26#include <vector> 27using std::vector; 28 29#include <fst/expanded-fst.h> 30#include <fst/fst-decl.h> // For optional argument declarations 31#include <fst/test-properties.h> 32#include <fst/util.h> 33 34 35namespace fst { 36 37template <class A, class U> class ConstFst; 38template <class F, class G> void Cast(const F &, G *); 39 40// States and arcs each implemented by single arrays, templated on the 41// Arc definition. The unsigned type U is used to represent indices into 42// the arc array. 43template <class A, class U> 44class ConstFstImpl : public FstImpl<A> { 45 public: 46 using FstImpl<A>::SetInputSymbols; 47 using FstImpl<A>::SetOutputSymbols; 48 using FstImpl<A>::SetType; 49 using FstImpl<A>::SetProperties; 50 using FstImpl<A>::Properties; 51 52 typedef A Arc; 53 typedef typename A::Weight Weight; 54 typedef typename A::StateId StateId; 55 typedef U Unsigned; 56 57 ConstFstImpl() 58 : states_(0), arcs_(0), nstates_(0), narcs_(0), start_(kNoStateId) { 59 string type = "const"; 60 if (sizeof(U) != sizeof(uint32)) { 61 string size; 62 Int64ToStr(8 * sizeof(U), &size); 63 type += size; 64 } 65 SetType(type); 66 SetProperties(kNullProperties | kStaticProperties); 67 } 68 69 explicit ConstFstImpl(const Fst<A> &fst); 70 71 ~ConstFstImpl() { 72 delete[] states_; 73 delete[] arcs_; 74 } 75 76 StateId Start() const { return start_; } 77 78 Weight Final(StateId s) const { return states_[s].final; } 79 80 StateId NumStates() const { return nstates_; } 81 82 size_t NumArcs(StateId s) const { return states_[s].narcs; } 83 84 size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; } 85 86 size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; } 87 88 static ConstFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts); 89 90 bool Write(ostream &strm, const FstWriteOptions &opts) const; 91 92 A *Arcs(StateId s) { return arcs_ + states_[s].pos; } 93 94 // Provide information needed for generic state iterator 95 void InitStateIterator(StateIteratorData<A> *data) const { 96 data->base = 0; 97 data->nstates = nstates_; 98 } 99 100 // Provide information needed for the generic arc iterator 101 void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { 102 data->base = 0; 103 data->arcs = arcs_ + states_[s].pos; 104 data->narcs = states_[s].narcs; 105 data->ref_count = 0; 106 } 107 108 private: 109 friend class ConstFst<A, U>; // Allow finding narcs_, nstates_ during Write 110 111 // States implemented by array *states_ below, arcs by (single) *arcs_. 112 struct State { 113 Weight final; // Final weight 114 Unsigned pos; // Start of state's arcs in *arcs_ 115 Unsigned narcs; // Number of arcs (per state) 116 Unsigned niepsilons; // # of input epsilons 117 Unsigned noepsilons; // # of output epsilons 118 State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {} 119 }; 120 121 // Properties always true of this Fst class 122 static const uint64 kStaticProperties = kExpanded; 123 // Current unaligned file format version. The unaligned version was added and 124 // made the default since the aligned version does not work on pipes. 125 static const int kFileVersion = 2; 126 // Current aligned file format version 127 static const int kAlignedFileVersion = 1; 128 // Minimum file format version supported 129 static const int kMinFileVersion = 1; 130 // Byte alignment for states and arcs in file format (version 1 only) 131 static const int kFileAlign = 16; 132 133 State *states_; // States represenation 134 A *arcs_; // Arcs representation 135 StateId nstates_; // Number of states 136 size_t narcs_; // Number of arcs (per FST) 137 StateId start_; // Initial state 138 139 DISALLOW_COPY_AND_ASSIGN(ConstFstImpl); 140}; 141 142template <class A, class U> 143const uint64 ConstFstImpl<A, U>::kStaticProperties; 144template <class A, class U> 145const int ConstFstImpl<A, U>::kFileVersion; 146template <class A, class U> 147const int ConstFstImpl<A, U>::kAlignedFileVersion; 148template <class A, class U> 149const int ConstFstImpl<A, U>::kMinFileVersion; 150template <class A, class U> 151const int ConstFstImpl<A, U>::kFileAlign; 152 153 154template<class A, class U> 155ConstFstImpl<A, U>::ConstFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0) { 156 string type = "const"; 157 if (sizeof(U) != sizeof(uint32)) { 158 string size; 159 Int64ToStr(sizeof(U) * 8, &size); 160 type += size; 161 } 162 SetType(type); 163 SetInputSymbols(fst.InputSymbols()); 164 SetOutputSymbols(fst.OutputSymbols()); 165 start_ = fst.Start(); 166 167 // Count # of states and arcs. 168 for (StateIterator< Fst<A> > siter(fst); 169 !siter.Done(); 170 siter.Next()) { 171 ++nstates_; 172 StateId s = siter.Value(); 173 for (ArcIterator< Fst<A> > aiter(fst, s); 174 !aiter.Done(); 175 aiter.Next()) 176 ++narcs_; 177 } 178 states_ = new State[nstates_]; 179 arcs_ = new A[narcs_]; 180 size_t pos = 0; 181 for (StateId s = 0; s < nstates_; ++s) { 182 states_[s].final = fst.Final(s); 183 states_[s].pos = pos; 184 states_[s].narcs = 0; 185 states_[s].niepsilons = 0; 186 states_[s].noepsilons = 0; 187 for (ArcIterator< Fst<A> > aiter(fst, s); 188 !aiter.Done(); 189 aiter.Next()) { 190 const A &arc = aiter.Value(); 191 ++states_[s].narcs; 192 if (arc.ilabel == 0) 193 ++states_[s].niepsilons; 194 if (arc.olabel == 0) 195 ++states_[s].noepsilons; 196 arcs_[pos++] = arc; 197 } 198 } 199 SetProperties(fst.Properties(kCopyProperties, true) | kStaticProperties); 200} 201 202 203template<class A, class U> 204ConstFstImpl<A, U> *ConstFstImpl<A, U>::Read(istream &strm, 205 const FstReadOptions &opts) { 206 ConstFstImpl<A, U> *impl = new ConstFstImpl<A, U>; 207 FstHeader hdr; 208 if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { 209 delete impl; 210 return 0; 211 } 212 impl->start_ = hdr.Start(); 213 impl->nstates_ = hdr.NumStates(); 214 impl->narcs_ = hdr.NumArcs(); 215 impl->states_ = new State[impl->nstates_]; 216 impl->arcs_ = new A[impl->narcs_]; 217 218 // Ensures compatibility 219 if (hdr.Version() == kAlignedFileVersion) 220 hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); 221 222 if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && 223 !AlignInput(strm, kFileAlign)) { 224 LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source; 225 delete impl; 226 return 0; 227 } 228 size_t b = impl->nstates_ * sizeof(typename ConstFstImpl<A, U>::State); 229 strm.read(reinterpret_cast<char *>(impl->states_), b); 230 if (!strm) { 231 LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source; 232 delete impl; 233 return 0; 234 } 235 if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && 236 !AlignInput(strm, kFileAlign)) { 237 LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source; 238 delete impl; 239 return 0; 240 } 241 b = impl->narcs_ * sizeof(A); 242 strm.read(reinterpret_cast<char *>(impl->arcs_), b); 243 if (!strm) { 244 LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source; 245 delete impl; 246 return 0; 247 } 248 return impl; 249} 250 251// Simple concrete immutable FST. This class attaches interface to 252// implementation and handles reference counting, delegating most 253// methods to ImplToExpandedFst. The unsigned type U is used to 254// represent indices into the arc array (uint32 by default, declared 255// in fst-decl.h). 256template <class A, class U> 257class ConstFst : public ImplToExpandedFst< ConstFstImpl<A, U> > { 258 public: 259 friend class StateIterator< ConstFst<A, U> >; 260 friend class ArcIterator< ConstFst<A, U> >; 261 template <class F, class G> void friend Cast(const F &, G *); 262 263 typedef A Arc; 264 typedef typename A::StateId StateId; 265 typedef ConstFstImpl<A, U> Impl; 266 typedef U Unsigned; 267 268 ConstFst() : ImplToExpandedFst<Impl>(new Impl()) {} 269 270 explicit ConstFst(const Fst<A> &fst) 271 : ImplToExpandedFst<Impl>(new Impl(fst)) {} 272 273 ConstFst(const ConstFst<A, U> &fst) : ImplToExpandedFst<Impl>(fst) {} 274 275 // Get a copy of this ConstFst. See Fst<>::Copy() for further doc. 276 virtual ConstFst<A, U> *Copy(bool safe = false) const { 277 return new ConstFst<A, U>(*this); 278 } 279 280 // Read a ConstFst from an input stream; return NULL on error 281 static ConstFst<A, U> *Read(istream &strm, const FstReadOptions &opts) { 282 Impl* impl = Impl::Read(strm, opts); 283 return impl ? new ConstFst<A, U>(impl) : 0; 284 } 285 286 // Read a ConstFst from a file; return NULL on error 287 // Empty filename reads from standard input 288 static ConstFst<A, U> *Read(const string &filename) { 289 Impl* impl = ImplToExpandedFst<Impl>::Read(filename); 290 return impl ? new ConstFst<A, U>(impl) : 0; 291 } 292 293 virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { 294 return WriteFst(*this, strm, opts); 295 } 296 297 virtual bool Write(const string &filename) const { 298 return Fst<A>::WriteFile(filename); 299 } 300 301 template <class F> 302 static bool WriteFst(const F &fst, ostream &strm, 303 const FstWriteOptions &opts); 304 305 virtual void InitStateIterator(StateIteratorData<Arc> *data) const { 306 GetImpl()->InitStateIterator(data); 307 } 308 309 virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { 310 GetImpl()->InitArcIterator(s, data); 311 } 312 313 private: 314 explicit ConstFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {} 315 316 // Makes visible to friends. 317 Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); } 318 319 void SetImpl(Impl *impl, bool own_impl = true) { 320 ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl); 321 } 322 323 void operator=(const ConstFst<A, U> &fst); // disallow 324}; 325 326// Writes Fst in Const format, potentially with a pass over the machine 327// before writing to compute number of states and arcs. 328// 329template <class A, class U> 330template <class F> 331bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm, 332 const FstWriteOptions &opts) { 333 static const int kFileVersion = 2; 334 static const int kAlignedFileVersion = 1; 335 static const int kFileAlign = 16; 336 int file_version = opts.align ? kAlignedFileVersion : kFileVersion; 337 size_t num_arcs = -1, num_states = -1; 338 size_t start_offset = 0; 339 bool update_header = true; 340 if (fst.Type() == ConstFst<A, U>().Type()) { 341 const ConstFst<A, U> *const_fst = static_cast<const ConstFst<A, U> *>(&fst); 342 num_arcs = const_fst->GetImpl()->narcs_; 343 num_states = const_fst->GetImpl()->nstates_; 344 update_header = false; 345 } else if ((start_offset = strm.tellp()) == -1) { 346 // precompute values needed for header when we cannot seek to rewrite it. 347 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { 348 num_arcs += fst.NumArcs(siter.Value()); 349 num_states++; 350 } 351 update_header = false; 352 } 353 FstHeader hdr; 354 hdr.SetStart(fst.Start()); 355 hdr.SetNumStates(num_states); 356 hdr.SetNumArcs(num_arcs); 357 string type = "const"; 358 if (sizeof(U) != sizeof(uint32)) { 359 string size; 360 Int64ToStr(8 * sizeof(U), &size); 361 type += size; 362 } 363 FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, &hdr); 364 if (opts.align && !AlignOutput(strm, kFileAlign)) { 365 LOG(ERROR) << "Could not align file during write after header"; 366 return false; 367 } 368 size_t pos = 0, states = 0; 369 typename ConstFstImpl<A, U>::State state; 370 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { 371 state.final = fst.Final(siter.Value()); 372 state.pos = pos; 373 state.narcs = fst.NumArcs(siter.Value()); 374 state.niepsilons = fst.NumInputEpsilons(siter.Value()); 375 state.noepsilons = fst.NumOutputEpsilons(siter.Value()); 376 strm.write(reinterpret_cast<const char *>(&state), sizeof(state)); 377 pos += state.narcs; 378 states++; 379 } 380 hdr.SetNumStates(states); 381 hdr.SetNumArcs(pos); 382 if (opts.align && !AlignOutput(strm, kFileAlign)) { 383 LOG(ERROR) << "Could not align file during write after writing states"; 384 } 385 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { 386 StateId s = siter.Value(); 387 for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { 388 const A &arc = aiter.Value(); 389 strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc)); 390 } 391 } 392 strm.flush(); 393 if (!strm) { 394 LOG(ERROR) << "WriteAsVectorFst write failed: " << opts.source; 395 return false; 396 } 397 if (update_header) { 398 return FstImpl<A>::UpdateFstHeader(fst, strm, opts, file_version, type, 399 &hdr, start_offset); 400 } else { 401 if (hdr.NumStates() != num_states) { 402 LOG(ERROR) << "Inconsistent number of states observed during write"; 403 return false; 404 } 405 if (hdr.NumArcs() != num_arcs) { 406 LOG(ERROR) << "Inconsistent number of arcs observed during write"; 407 return false; 408 } 409 } 410 return true; 411} 412 413// Specialization for ConstFst; see generic version in fst.h 414// for sample usage (but use the ConstFst type!). This version 415// should inline. 416template <class A, class U> 417class StateIterator< ConstFst<A, U> > { 418 public: 419 typedef typename A::StateId StateId; 420 421 explicit StateIterator(const ConstFst<A, U> &fst) 422 : nstates_(fst.GetImpl()->NumStates()), s_(0) {} 423 424 bool Done() const { return s_ >= nstates_; } 425 426 StateId Value() const { return s_; } 427 428 void Next() { ++s_; } 429 430 void Reset() { s_ = 0; } 431 432 private: 433 StateId nstates_; 434 StateId s_; 435 436 DISALLOW_COPY_AND_ASSIGN(StateIterator); 437}; 438 439 440// Specialization for ConstFst; see generic version in fst.h 441// for sample usage (but use the ConstFst type!). This version 442// should inline. 443template <class A, class U> 444class ArcIterator< ConstFst<A, U> > { 445 public: 446 typedef typename A::StateId StateId; 447 448 ArcIterator(const ConstFst<A, U> &fst, StateId s) 449 : arcs_(fst.GetImpl()->Arcs(s)), 450 narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {} 451 452 bool Done() const { return i_ >= narcs_; } 453 454 const A& Value() const { return arcs_[i_]; } 455 456 void Next() { ++i_; } 457 458 size_t Position() const { return i_; } 459 460 void Reset() { i_ = 0; } 461 462 void Seek(size_t a) { i_ = a; } 463 464 uint32 Flags() const { 465 return kArcValueFlags; 466 } 467 468 void SetFlags(uint32 f, uint32 m) {} 469 470 private: 471 const A *arcs_; 472 size_t narcs_; 473 size_t i_; 474 475 DISALLOW_COPY_AND_ASSIGN(ArcIterator); 476}; 477 478// A useful alias when using StdArc. 479typedef ConstFst<StdArc> StdConstFst; 480 481} // namespace fst 482 483#endif // FST_LIB_CONST_FST_H__ 484