util.h revision dfd8b8327b93660601d016cdc6f29f433b45a8d8
1// util.h 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: riley@google.com (Michael Riley) 17// 18// \file 19// FST utility inline definitions. 20 21#ifndef FST_LIB_UTIL_H__ 22#define FST_LIB_UTIL_H__ 23 24#include <unordered_map> 25using std::tr1::unordered_map; 26using std::tr1::unordered_multimap; 27#include <unordered_set> 28using std::tr1::unordered_set; 29using std::tr1::unordered_multiset; 30#include <list> 31#include <map> 32#include <set> 33#include <sstream> 34#include <string> 35#include <vector> 36using std::vector; 37 38 39#include <fst/compat.h> 40#include <fst/types.h> 41 42#include <iostream> 43#include <fstream> 44#include <sstream> 45 46// 47// UTILITY FOR ERROR HANDLING 48// 49 50DECLARE_bool(fst_error_fatal); 51 52#define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) 53 54namespace fst { 55 56// 57// UTILITIES FOR TYPE I/O 58// 59 60// Read some types from an input stream. 61 62// Generic case. 63template <typename T> 64inline istream &ReadType(istream &strm, T *t) { 65 return t->Read(strm); 66} 67 68// Fixed size, contiguous memory read. 69#define READ_POD_TYPE(T) \ 70inline istream &ReadType(istream &strm, T *t) { \ 71 return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \ 72} 73 74READ_POD_TYPE(bool); 75READ_POD_TYPE(char); 76READ_POD_TYPE(signed char); 77READ_POD_TYPE(unsigned char); 78READ_POD_TYPE(short); 79READ_POD_TYPE(unsigned short); 80READ_POD_TYPE(int); 81READ_POD_TYPE(unsigned int); 82READ_POD_TYPE(long); 83READ_POD_TYPE(unsigned long); 84READ_POD_TYPE(long long); 85READ_POD_TYPE(unsigned long long); 86READ_POD_TYPE(float); 87READ_POD_TYPE(double); 88 89// String case. 90inline istream &ReadType(istream &strm, string *s) { 91 s->clear(); 92 int32 ns = 0; 93 strm.read(reinterpret_cast<char *>(&ns), sizeof(ns)); 94 for (int i = 0; i < ns; ++i) { 95 char c; 96 strm.read(&c, 1); 97 *s += c; 98 } 99 return strm; 100} 101 102// Pair case. 103template <typename S, typename T> 104inline istream &ReadType(istream &strm, pair<S, T> *p) { 105 ReadType(strm, &p->first); 106 ReadType(strm, &p->second); 107 return strm; 108} 109 110template <typename S, typename T> 111inline istream &ReadType(istream &strm, pair<const S, T> *p) { 112 ReadType(strm, const_cast<S *>(&p->first)); 113 ReadType(strm, &p->second); 114 return strm; 115} 116 117// General case - no-op. 118template <typename C> 119void StlReserve(C *c, int64 n) {} 120 121// Specialization for vectors. 122template <typename S, typename T> 123void StlReserve(vector<S, T> *c, int64 n) { 124 c->reserve(n); 125} 126 127// STL sequence container. 128#define READ_STL_SEQ_TYPE(C) \ 129template <typename S, typename T> \ 130inline istream &ReadType(istream &strm, C<S, T> *c) { \ 131 c->clear(); \ 132 int64 n = 0; \ 133 strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ 134 StlReserve(c, n); \ 135 for (ssize_t i = 0; i < n; ++i) { \ 136 typename C<S, T>::value_type value; \ 137 ReadType(strm, &value); \ 138 c->insert(c->end(), value); \ 139 } \ 140 return strm; \ 141} 142 143READ_STL_SEQ_TYPE(vector); 144READ_STL_SEQ_TYPE(list); 145 146// STL associative container. 147#define READ_STL_ASSOC_TYPE(C) \ 148template <typename S, typename T, typename U> \ 149inline istream &ReadType(istream &strm, C<S, T, U> *c) { \ 150 c->clear(); \ 151 int64 n = 0; \ 152 strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ 153 for (ssize_t i = 0; i < n; ++i) { \ 154 typename C<S, T, U>::value_type value; \ 155 ReadType(strm, &value); \ 156 c->insert(value); \ 157 } \ 158 return strm; \ 159} 160 161READ_STL_ASSOC_TYPE(set); 162READ_STL_ASSOC_TYPE(unordered_set); 163READ_STL_ASSOC_TYPE(map); 164READ_STL_ASSOC_TYPE(unordered_map); 165 166// Write some types to an output stream. 167 168// Generic case. 169template <typename T> 170inline ostream &WriteType(ostream &strm, const T t) { 171 t.Write(strm); 172 return strm; 173} 174 175// Fixed size, contiguous memory write. 176#define WRITE_POD_TYPE(T) \ 177inline ostream &WriteType(ostream &strm, const T t) { \ 178 return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \ 179} 180 181WRITE_POD_TYPE(bool); 182WRITE_POD_TYPE(char); 183WRITE_POD_TYPE(signed char); 184WRITE_POD_TYPE(unsigned char); 185WRITE_POD_TYPE(short); 186WRITE_POD_TYPE(unsigned short); 187WRITE_POD_TYPE(int); 188WRITE_POD_TYPE(unsigned int); 189WRITE_POD_TYPE(long); 190WRITE_POD_TYPE(unsigned long); 191WRITE_POD_TYPE(long long); 192WRITE_POD_TYPE(unsigned long long); 193WRITE_POD_TYPE(float); 194WRITE_POD_TYPE(double); 195 196// String case. 197inline ostream &WriteType(ostream &strm, const string &s) { 198 int32 ns = s.size(); 199 strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns)); 200 return strm.write(s.data(), ns); 201} 202 203// Pair case. 204template <typename S, typename T> 205inline ostream &WriteType(ostream &strm, const pair<S, T> &p) { 206 WriteType(strm, p.first); 207 WriteType(strm, p.second); 208 return strm; 209} 210 211// STL sequence container. 212#define WRITE_STL_SEQ_TYPE(C) \ 213template <typename S, typename T> \ 214inline ostream &WriteType(ostream &strm, const C<S, T> &c) { \ 215 int64 n = c.size(); \ 216 strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ 217 for (typename C<S, T>::const_iterator it = c.begin(); \ 218 it != c.end(); ++it) \ 219 WriteType(strm, *it); \ 220 return strm; \ 221} 222 223WRITE_STL_SEQ_TYPE(vector); 224WRITE_STL_SEQ_TYPE(list); 225 226// STL associative container. 227#define WRITE_STL_ASSOC_TYPE(C) \ 228template <typename S, typename T, typename U> \ 229inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) { \ 230 int64 n = c.size(); \ 231 strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ 232 for (typename C<S, T, U>::const_iterator it = c.begin(); \ 233 it != c.end(); ++it) \ 234 WriteType(strm, *it); \ 235 return strm; \ 236} 237 238WRITE_STL_ASSOC_TYPE(set); 239WRITE_STL_ASSOC_TYPE(unordered_set); 240WRITE_STL_ASSOC_TYPE(map); 241WRITE_STL_ASSOC_TYPE(unordered_map); 242 243// Utilities for converting between int64 or Weight and string. 244 245int64 StrToInt64(const string &s, const string &src, size_t nline, 246 bool allow_negative, bool *error = 0); 247 248template <typename Weight> 249Weight StrToWeight(const string &s, const string &src, size_t nline) { 250 Weight w; 251 istringstream strm(s); 252 strm >> w; 253 if (!strm) { 254 FSTERROR() << "StrToWeight: Bad weight = \"" << s 255 << "\", source = " << src << ", line = " << nline; 256 return Weight::NoWeight(); 257 } 258 return w; 259} 260 261void Int64ToStr(int64 n, string *s); 262 263template <typename Weight> 264void WeightToStr(Weight w, string *s) { 265 ostringstream strm; 266 strm.precision(9); 267 strm << w; 268 s->append(strm.str().data(), strm.str().size()); 269} 270 271// Utilities for reading/writing label pairs 272 273// Returns true on success 274template <typename Label> 275bool ReadLabelPairs(const string& filename, 276 vector<pair<Label, Label> >* pairs, 277 bool allow_negative = false) { 278 ifstream strm(filename.c_str()); 279 280 if (!strm) { 281 LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename; 282 return false; 283 } 284 285 const int kLineLen = 8096; 286 char line[kLineLen]; 287 size_t nline = 0; 288 289 pairs->clear(); 290 while (strm.getline(line, kLineLen)) { 291 ++nline; 292 vector<char *> col; 293 SplitToVector(line, "\n\t ", &col, true); 294 if (col.size() == 0 || col[0][0] == '\0') // empty line 295 continue; 296 if (col.size() != 2) { 297 LOG(ERROR) << "ReadLabelPairs: Bad number of columns, " 298 << "file = " << filename << ", line = " << nline; 299 return false; 300 } 301 302 bool err; 303 Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err); 304 if (err) return false; 305 Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err); 306 if (err) return false; 307 pairs->push_back(make_pair(frmlabel, tolabel)); 308 } 309 return true; 310} 311 312// Returns true on success 313template <typename Label> 314bool WriteLabelPairs(const string& filename, 315 const vector<pair<Label, Label> >& pairs) { 316 ostream *strm = &cout; 317 if (!filename.empty()) { 318 strm = new ofstream(filename.c_str()); 319 if (!*strm) { 320 LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename; 321 return false; 322 } 323 } 324 325 for (ssize_t n = 0; n < pairs.size(); ++n) 326 *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; 327 328 if (!*strm) { 329 LOG(ERROR) << "WriteLabelPairs: Write failed: " 330 << (filename.empty() ? "standard output" : filename); 331 return false; 332 } 333 if (strm != &cout) 334 delete strm; 335 return true; 336} 337 338// Utilities for converting a type name to a legal C symbol. 339 340void ConvertToLegalCSymbol(string *s); 341 342 343// 344// UTILITIES FOR STREAM I/O 345// 346 347bool AlignInput(istream &strm, int align); 348bool AlignOutput(ostream &strm, int align); 349 350// 351// UTILITIES FOR PROTOCOL BUFFER I/O 352// 353 354 355// An associative container for which testing membership is 356// faster than an STL set if members are restricted to an interval 357// that excludes most non-members. A 'Key' must have ==, !=, and < defined. 358// Element 'NoKey' should be a key that marks an uninitialized key and 359// is otherwise unused. 'Find()' returns an STL const_iterator to the match 360// found, otherwise it equals 'End()'. 361template <class Key, Key NoKey> 362class CompactSet { 363public: 364 typedef typename set<Key>::const_iterator const_iterator; 365 366 CompactSet() 367 : min_key_(NoKey), 368 max_key_(NoKey) { } 369 370 CompactSet(const CompactSet<Key, NoKey> &compact_set) 371 : set_(compact_set.set_), 372 min_key_(compact_set.min_key_), 373 max_key_(compact_set.max_key_) { } 374 375 void Insert(Key key) { 376 set_.insert(key); 377 if (min_key_ == NoKey || key < min_key_) 378 min_key_ = key; 379 if (max_key_ == NoKey || max_key_ < key) 380 max_key_ = key; 381 } 382 383 void Clear() { 384 set_.clear(); 385 min_key_ = max_key_ = NoKey; 386 } 387 388 const_iterator Find(Key key) const { 389 if (min_key_ == NoKey || 390 key < min_key_ || max_key_ < key) 391 return set_.end(); 392 else 393 return set_.find(key); 394 } 395 396 const_iterator Begin() const { return set_.begin(); } 397 398 const_iterator End() const { return set_.end(); } 399 400private: 401 set<Key> set_; 402 Key min_key_; 403 Key max_key_; 404 405 void operator=(const CompactSet<Key, NoKey> &); //disallow 406}; 407 408} // namespace fst 409 410#endif // FST_LIB_UTIL_H__ 411