icu.h revision f4c12fce1ee58e670f9c3fce46c40296ba9ee8a2
1// icu.h
2
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Copyright 2005-2010 Google, Inc.
16// Author: roubert@google.com (Fredrik Roubert)
17
18// Wrapper class for UErrorCode, with conversion operators for direct use in
19// ICU C and C++ APIs.
20//
21// Features:
22// - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
23//   removing one common source of errors.
24// - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking
25//   UErrorCode& (reference), via conversion operators.
26// - Automatic checking for success when it goes out of scope. On failure,
27//   the destructor will FSTERROR() an error message.
28//
29// Most of ICU will handle errors gracefully and provide sensible fallbacks.
30// Using IcuErrorCode, it is therefore possible to write very compact code
31// that does sensible things on failure and provides logging for debugging.
32//
33// Example:
34//
35// IcuErrorCode icuerrorcode;
36// return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL;
37
38#ifndef FST_LIB_ICU_H_
39#define FST_LIB_ICU_H_
40
41#include <unicode/errorcode.h>
42#include <unicode/unistr.h>
43#include <unicode/ustring.h>
44#include <unicode/utf8.h>
45
46class IcuErrorCode : public icu::ErrorCode {
47 public:
48  IcuErrorCode() {}
49  virtual ~IcuErrorCode() { if (isFailure()) handleFailure(); }
50
51  // Redefine 'errorName()' in order to be compatible with ICU version 4.2
52  const char* errorName() const {
53    return u_errorName(errorCode);
54  }
55
56 protected:
57  virtual void handleFailure() const {
58    FSTERROR() << errorName();
59}
60
61 private:
62  DISALLOW_COPY_AND_ASSIGN(IcuErrorCode);
63};
64
65namespace fst {
66
67template <class Label>
68bool UTF8StringToLabels(const string &str, vector<Label> *labels) {
69  const char *c_str = str.c_str();
70  int32_t length = str.size();
71  UChar32 c;
72  for (int32_t i = 0; i < length; /* no update */) {
73    U8_NEXT(c_str, i, length, c);
74    if (c < 0) {
75      LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c;
76      return false;
77    }
78    labels->push_back(c);
79  }
80  return true;
81}
82
83template <class Label>
84bool LabelsToUTF8String(const vector<Label> &labels, string *str) {
85  icu::UnicodeString u_str;
86  char c_str[5];
87  for (size_t i = 0; i < labels.size(); ++i) {
88    u_str.setTo(labels[i]);
89    IcuErrorCode error;
90    u_strToUTF8(c_str, 5, NULL, u_str.getTerminatedBuffer(), -1, error);
91    if (error.isFailure()) {
92      LOG(ERROR) << "LabelsToUTF8String: Bad encoding: "
93                 << error.errorName();
94      return false;
95    }
96    *str += c_str;
97  }
98  return true;
99}
100
101}  // namespace fst
102
103#endif  // FST_LIB_ICU_H_
104