1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// util.cc 2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License"); 4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License. 5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at 6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// http://www.apache.org/licenses/LICENSE-2.0 8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software 10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS, 11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and 13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License. 14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc. 16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley) 17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// 18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file 19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// FST utility definitions. 20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <cctype> 22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string> 23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h> 245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <fst/mapped-file.h> 25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Utility flag definitions 27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(fst_error_fatal, true, 29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson "FST errors are fatal; o.w. return objects flagged as bad: " 30dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin " e.g., FSTs - kError prop. true, FST weights - not a Member()"); 31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst { 33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint64 StrToInt64(const string &s, const string &src, size_t nline, 355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin bool allow_negative, bool *error) { 36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 n; 37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson const char *cs = s.c_str(); 38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson char *p; 39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (error) *error = false; 40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson n = strtoll(cs, &p, 10); 41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (p < cs + s.size() || (!allow_negative && n < 0)) { 42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson FSTERROR() << "StrToInt64: Bad integer = " << s 43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson << "\", source = " << src << ", line = " << nline; 44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (error) *error = true; 45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return 0; 46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return n; 48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Int64ToStr(int64 n, string *s) { 51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson ostringstream nstr; 52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson nstr << n; 53dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin s->append(nstr.str().data(), nstr.str().size()); 54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ConvertToLegalCSymbol(string *s) { 57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson for (string::iterator it = s->begin(); it != s->end(); ++it) 58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (!isalnum(*it)) *it = '_'; 59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Skips over input characters to align to 'align' bytes. Returns 62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// false if can't align. 635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinbool AlignInput(istream &strm) { 64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson char c; 655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin for (int i = 0; i < MappedFile::kArchAlignment; ++i) { 66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 pos = strm.tellg(); 67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (pos < 0) { 68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "AlignInput: can't determine stream position"; 69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (pos % MappedFile::kArchAlignment == 0) break; 72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson strm.read(&c, 1); 73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Write null output characters to align to 'align' bytes. Returns 78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// false if can't align. 795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinbool AlignOutput(ostream &strm) { 805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin for (int i = 0; i < MappedFile::kArchAlignment; ++i) { 81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson int64 pos = strm.tellp(); 82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson if (pos < 0) { 83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson LOG(ERROR) << "AlignOutput: can't determine stream position"; 84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return false; 85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin if (pos % MappedFile::kArchAlignment == 0) break; 87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson strm.write("", 1); 88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson } 89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson return true; 90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} 91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson 93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson} // namespace fst 94