1f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// util.cc
2f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
3f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Licensed under the Apache License, Version 2.0 (the "License");
4f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// you may not use this file except in compliance with the License.
5f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// You may obtain a copy of the License at
6f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
7f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//     http://www.apache.org/licenses/LICENSE-2.0
8f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
9f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Unless required by applicable law or agreed to in writing, software
10f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// distributed under the License is distributed on an "AS IS" BASIS,
11f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// See the License for the specific language governing permissions and
13f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// limitations under the License.
14f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
15f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Copyright 2005-2010 Google, Inc.
16f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Author: riley@google.com (Michael Riley)
17f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson//
18f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// \file
19f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// FST utility definitions.
20f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
21f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <cctype>
22f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <string>
23f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson#include <fst/util.h>
245b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin#include <fst/mapped-file.h>
25f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
26f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Utility flag definitions
27f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
28f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian HodsonDEFINE_bool(fst_error_fatal, true,
29f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson            "FST errors are fatal; o.w. return objects flagged as bad: "
30dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin            " e.g., FSTs - kError prop. true, FST weights - not  a Member()");
31f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
32f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonnamespace fst {
33f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
34f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonint64 StrToInt64(const string &s, const string &src, size_t nline,
355b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin                 bool allow_negative, bool *error) {
36f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  int64 n;
37f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  const char *cs = s.c_str();
38f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  char *p;
39f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (error) *error = false;
40f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  n = strtoll(cs, &p, 10);
41f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  if (p < cs + s.size() || (!allow_negative && n < 0)) {
42f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    FSTERROR() << "StrToInt64: Bad integer = " << s
43f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson               << "\", source = " << src << ", line = " << nline;
44f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (error) *error = true;
45f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    return 0;
46f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
47f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return n;
48f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
49f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
50f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid Int64ToStr(int64 n, string *s) {
51f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  ostringstream nstr;
52f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  nstr << n;
53dfd8b8327b93660601d016cdc6f29f433b45a8d8Alexander Gutkin  s->append(nstr.str().data(), nstr.str().size());
54f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
55f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
56f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodsonvoid ConvertToLegalCSymbol(string *s) {
57f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  for (string::iterator it = s->begin(); it != s->end(); ++it)
58f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (!isalnum(*it)) *it = '_';
59f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
60f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
61f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Skips over input characters to align to 'align' bytes. Returns
62f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// false if can't align.
635b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinbool AlignInput(istream &strm) {
64f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  char c;
655b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  for (int i = 0; i < MappedFile::kArchAlignment; ++i) {
66f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    int64 pos = strm.tellg();
67f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (pos < 0) {
68f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "AlignInput: can't determine stream position";
69f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
70f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
715b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (pos % MappedFile::kArchAlignment == 0) break;
72f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    strm.read(&c, 1);
73f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
74f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return true;
75f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
76f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
77f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// Write null output characters to align to 'align' bytes. Returns
78f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson// false if can't align.
795b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkinbool AlignOutput(ostream &strm) {
805b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin  for (int i = 0; i < MappedFile::kArchAlignment; ++i) {
81f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    int64 pos = strm.tellp();
82f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    if (pos < 0) {
83f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      LOG(ERROR) << "AlignOutput: can't determine stream position";
84f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson      return false;
85f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    }
865b6dc79427b8f7eeb6a7ff68034ab8548ce670eaAlexander Gutkin    if (pos % MappedFile::kArchAlignment == 0) break;
87f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson    strm.write("", 1);
88f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  }
89f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson  return true;
90f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}
91f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
92f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson
93f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2Ian Hodson}  // namespace fst
94