util.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// util.h 258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// Licensed under the Apache License, Version 2.0 (the "License"); 458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// you may not use this file except in compliance with the License. 558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// You may obtain a copy of the License at 658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// 758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// http://www.apache.org/licenses/LICENSE-2.0 858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// 9116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch// Unless required by applicable law or agreed to in writing, software 1058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// distributed under the License is distributed on an "AS IS" BASIS, 1158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// See the License for the specific language governing permissions and 1358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// limitations under the License. 1458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// 1558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// Copyright 2005-2010 Google, Inc. 1658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// Author: riley@google.com (Michael Riley) 1758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// 18a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// \file 195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// FST utility inline definitions. 205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#ifndef FST_LIB_UTIL_H__ 225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#define FST_LIB_UTIL_H__ 235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include <unordered_map> 255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)using std::tr1::unordered_map; 26a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochusing std::tr1::unordered_multimap; 275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include <unordered_set> 285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)using std::tr1::unordered_set; 2958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)using std::tr1::unordered_multiset; 3058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#include <list> 3158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#include <map> 3258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#include <set> 3358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#include <sstream> 3458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#include <string> 355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include <vector> 365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)using std::vector; 375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 39a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch#include <fst/compat.h> 405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include <fst/types.h> 415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include <iostream> 4358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#include <fstream> 4458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 4558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// 4658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// UTILITY FOR ERROR HANDLING 4758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// 4858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 4958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)DECLARE_bool(fst_error_fatal); 50a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) 525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)namespace fst { 545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// 565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// UTILITIES FOR TYPE I/O 575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// 5858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 59a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// Read some types from an input stream. 60010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) 61010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)// Generic case. 62010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)template <typename T> 6358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)inline istream &ReadType(istream &strm, T *t) { 6458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return t->Read(strm); 6558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)} 6658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 6758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// Fixed size, contiguous memory read. 6858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)#define READ_POD_TYPE(T) \ 6958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)inline istream &ReadType(istream &strm, T *t) { \ 7058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \ 7158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)} 7258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 7358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(bool); 7458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(char); 7558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(signed char); 7658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(unsigned char); 7758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(short); 7858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(unsigned short); 7958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(int); 8058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(unsigned int); 8158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(long); 8258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(unsigned long); 8358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(long long); 8458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(unsigned long long); 8558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)READ_POD_TYPE(float); 86a02191e04bc25c4935f804f2c080ae28663d096dBen MurdochREAD_POD_TYPE(double); 8758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 88a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch// String case. 8958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)inline istream &ReadType(istream &strm, string *s) { 9058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) s->clear(); 91a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch int32 ns = 0; 9258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) strm.read(reinterpret_cast<char *>(&ns), sizeof(ns)); 935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) for (int i = 0; i < ns; ++i) { 945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) char c; 955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) strm.read(&c, 1); 965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) *s += c; 97116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch } 98116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return strm; 99116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch} 100116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 101116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch// Pair case. 102116680a4aac90f2aa7413d9095a592090648e557Ben Murdochtemplate <typename S, typename T> 103010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)inline istream &ReadType(istream &strm, pair<S, T> *p) { 104010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles) ReadType(strm, &p->first); 1056d86b77056ed63eb6871182f42a9fd5f07550f90Torne (Richard Coles) ReadType(strm, &p->second); 106116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return strm; 10758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)} 10823730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) 1091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccitemplate <typename S, typename T> 1101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucciinline istream &ReadType(istream &strm, pair<const S, T> *p) { 1111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ReadType(strm, const_cast<S *>(&p->first)); 1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci ReadType(strm, &p->second); 11323730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) return strm; 11423730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles)} 11523730a6e56a168d1879203e4b3819bb36e3d8f1fTorne (Richard Coles) 116116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch// General case - no-op. 117116680a4aac90f2aa7413d9095a592090648e557Ben Murdochtemplate <typename C> 118a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)void StlReserve(C *c, int64 n) {} 119a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 12058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// Specialization for vectors. 12158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)template <typename S, typename T> 12258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)void StlReserve(vector<S, T> *c, int64 n) { 123 c->reserve(n); 124} 125 126// STL sequence container. 127#define READ_STL_SEQ_TYPE(C) \ 128template <typename S, typename T> \ 129inline istream &ReadType(istream &strm, C<S, T> *c) { \ 130 c->clear(); \ 131 int64 n = 0; \ 132 strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ 133 StlReserve(c, n); \ 134 for (ssize_t i = 0; i < n; ++i) { \ 135 typename C<S, T>::value_type value; \ 136 ReadType(strm, &value); \ 137 c->insert(c->end(), value); \ 138 } \ 139 return strm; \ 140} 141 142READ_STL_SEQ_TYPE(vector); 143READ_STL_SEQ_TYPE(list); 144 145// STL associative container. 146#define READ_STL_ASSOC_TYPE(C) \ 147template <typename S, typename T, typename U> \ 148inline istream &ReadType(istream &strm, C<S, T, U> *c) { \ 149 c->clear(); \ 150 int64 n = 0; \ 151 strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ 152 for (ssize_t i = 0; i < n; ++i) { \ 153 typename C<S, T, U>::value_type value; \ 154 ReadType(strm, &value); \ 155 c->insert(value); \ 156 } \ 157 return strm; \ 158} 159 160READ_STL_ASSOC_TYPE(set); 161READ_STL_ASSOC_TYPE(unordered_set); 162READ_STL_ASSOC_TYPE(map); 163READ_STL_ASSOC_TYPE(unordered_map); 164 165// Write some types to an output stream. 166 167// Generic case. 168template <typename T> 169inline ostream &WriteType(ostream &strm, const T t) { 170 t.Write(strm); 171 return strm; 172} 173 174// Fixed size, contiguous memory write. 175#define WRITE_POD_TYPE(T) \ 176inline ostream &WriteType(ostream &strm, const T t) { \ 177 return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \ 178} 179 180WRITE_POD_TYPE(bool); 181WRITE_POD_TYPE(char); 182WRITE_POD_TYPE(signed char); 183WRITE_POD_TYPE(unsigned char); 184WRITE_POD_TYPE(short); 185WRITE_POD_TYPE(unsigned short); 186WRITE_POD_TYPE(int); 187WRITE_POD_TYPE(unsigned int); 188WRITE_POD_TYPE(long); 189WRITE_POD_TYPE(unsigned long); 190WRITE_POD_TYPE(long long); 191WRITE_POD_TYPE(unsigned long long); 192WRITE_POD_TYPE(float); 193WRITE_POD_TYPE(double); 194 195// String case. 196inline ostream &WriteType(ostream &strm, const string &s) { 197 int32 ns = s.size(); 198 strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns)); 199 return strm.write(s.data(), ns); 200} 201 202// Pair case. 203template <typename S, typename T> 204inline ostream &WriteType(ostream &strm, const pair<S, T> &p) { 205 WriteType(strm, p.first); 206 WriteType(strm, p.second); 207 return strm; 208} 209 210// STL sequence container. 211#define WRITE_STL_SEQ_TYPE(C) \ 212template <typename S, typename T> \ 213inline ostream &WriteType(ostream &strm, const C<S, T> &c) { \ 214 int64 n = c.size(); \ 215 strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ 216 for (typename C<S, T>::const_iterator it = c.begin(); \ 217 it != c.end(); ++it) \ 218 WriteType(strm, *it); \ 219 return strm; \ 220} 221 222WRITE_STL_SEQ_TYPE(vector); 223WRITE_STL_SEQ_TYPE(list); 224 225// STL associative container. 226#define WRITE_STL_ASSOC_TYPE(C) \ 227template <typename S, typename T, typename U> \ 228inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) { \ 229 int64 n = c.size(); \ 230 strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ 231 for (typename C<S, T, U>::const_iterator it = c.begin(); \ 232 it != c.end(); ++it) \ 233 WriteType(strm, *it); \ 234 return strm; \ 235} 236 237WRITE_STL_ASSOC_TYPE(set); 238WRITE_STL_ASSOC_TYPE(unordered_set); 239WRITE_STL_ASSOC_TYPE(map); 240WRITE_STL_ASSOC_TYPE(unordered_map); 241 242// Utilities for converting between int64 or Weight and string. 243 244int64 StrToInt64(const string &s, const string &src, size_t nline, 245 bool allow_negative, bool *error = 0); 246 247template <typename Weight> 248Weight StrToWeight(const string &s, const string &src, size_t nline) { 249 Weight w; 250 istringstream strm(s); 251 strm >> w; 252 if (!strm) { 253 FSTERROR() << "StrToWeight: Bad weight = \"" << s 254 << "\", source = " << src << ", line = " << nline; 255 return Weight::NoWeight(); 256 } 257 return w; 258} 259 260void Int64ToStr(int64 n, string *s); 261 262template <typename Weight> 263void WeightToStr(Weight w, string *s) { 264 ostringstream strm; 265 strm.precision(9); 266 strm << w; 267 *s += strm.str(); 268} 269 270// Utilities for reading/writing label pairs 271 272// Returns true on success 273template <typename Label> 274bool ReadLabelPairs(const string& filename, 275 vector<pair<Label, Label> >* pairs, 276 bool allow_negative = false) { 277 ifstream strm(filename.c_str()); 278 279 if (!strm) { 280 LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename; 281 return false; 282 } 283 284 const int kLineLen = 8096; 285 char line[kLineLen]; 286 size_t nline = 0; 287 288 pairs->clear(); 289 while (strm.getline(line, kLineLen)) { 290 ++nline; 291 vector<char *> col; 292 SplitToVector(line, "\n\t ", &col, true); 293 if (col.size() == 0 || col[0][0] == '\0') // empty line 294 continue; 295 if (col.size() != 2) { 296 LOG(ERROR) << "ReadLabelPairs: Bad number of columns, " 297 << "file = " << filename << ", line = " << nline; 298 return false; 299 } 300 301 bool err; 302 Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err); 303 if (err) return false; 304 Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err); 305 if (err) return false; 306 pairs->push_back(make_pair(frmlabel, tolabel)); 307 } 308 return true; 309} 310 311// Returns true on success 312template <typename Label> 313bool WriteLabelPairs(const string& filename, 314 const vector<pair<Label, Label> >& pairs) { 315 ostream *strm = &std::cout; 316 if (!filename.empty()) { 317 strm = new ofstream(filename.c_str()); 318 if (!*strm) { 319 LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename; 320 return false; 321 } 322 } 323 324 for (ssize_t n = 0; n < pairs.size(); ++n) 325 *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; 326 327 if (!*strm) { 328 LOG(ERROR) << "WriteLabelPairs: Write failed: " 329 << (filename.empty() ? "standard output" : filename); 330 return false; 331 } 332 if (strm != &std::cout) 333 delete strm; 334 return true; 335} 336 337// Utilities for converting a type name to a legal C symbol. 338 339void ConvertToLegalCSymbol(string *s); 340 341 342// 343// UTILITIES FOR STREAM I/O 344// 345 346bool AlignInput(istream &strm, int align); 347bool AlignOutput(ostream &strm, int align); 348 349// 350// UTILITIES FOR PROTOCOL BUFFER I/O 351// 352 353 354// An associative container for which testing membership is 355// faster than an STL set if members are restricted to an interval 356// that excludes most non-members. A 'Key' must have ==, !=, and < defined. 357// Element 'NoKey' should be a key that marks an uninitialized key and 358// is otherwise unused. 'Find()' returns an STL const_iterator to the match 359// found, otherwise it equals 'End()'. 360template <class Key, Key NoKey> 361class CompactSet { 362public: 363 typedef typename set<Key>::const_iterator const_iterator; 364 365 CompactSet() 366 : min_key_(NoKey), 367 max_key_(NoKey) { } 368 369 CompactSet(const CompactSet<Key, NoKey> &compact_set) 370 : set_(compact_set.set_), 371 min_key_(compact_set.min_key_), 372 max_key_(compact_set.max_key_) { } 373 374 void Insert(Key key) { 375 set_.insert(key); 376 if (min_key_ == NoKey || key < min_key_) 377 min_key_ = key; 378 if (max_key_ == NoKey || max_key_ < key) 379 max_key_ = key; 380 } 381 382 void Clear() { 383 set_.clear(); 384 min_key_ = max_key_ = NoKey; 385 } 386 387 const_iterator Find(Key key) const { 388 if (min_key_ == NoKey || 389 key < min_key_ || max_key_ < key) 390 return set_.end(); 391 else 392 return set_.find(key); 393 } 394 395 const_iterator Begin() const { return set_.begin(); } 396 397 const_iterator End() const { return set_.end(); } 398 399private: 400 set<Key> set_; 401 Key min_key_; 402 Key max_key_; 403 404 void operator=(const CompactSet<Key, NoKey> &); //disallow 405}; 406 407} // namespace fst 408 409#endif // FST_LIB_UTIL_H__ 410