1// icu.h 2 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15// Copyright 2005-2010 Google, Inc. 16// Author: roubert@google.com (Fredrik Roubert) 17 18// Wrapper class for UErrorCode, with conversion operators for direct use in 19// ICU C and C++ APIs. 20// 21// Features: 22// - The constructor initializes the internal UErrorCode to U_ZERO_ERROR, 23// removing one common source of errors. 24// - Same use in C APIs taking a UErrorCode* (pointer) and C++ taking 25// UErrorCode& (reference), via conversion operators. 26// - Automatic checking for success when it goes out of scope. On failure, 27// the destructor will FSTERROR() an error message. 28// 29// Most of ICU will handle errors gracefully and provide sensible fallbacks. 30// Using IcuErrorCode, it is therefore possible to write very compact code 31// that does sensible things on failure and provides logging for debugging. 32// 33// Example: 34// 35// IcuErrorCode icuerrorcode; 36// return collator.compareUTF8(a, b, icuerrorcode) == UCOL_EQUAL; 37 38#ifndef FST_LIB_ICU_H_ 39#define FST_LIB_ICU_H_ 40 41#include <unicode/errorcode.h> 42#include <unicode/unistr.h> 43#include <unicode/ustring.h> 44#include <unicode/utf8.h> 45 46class IcuErrorCode : public icu::ErrorCode { 47 public: 48 IcuErrorCode() {} 49 virtual ~IcuErrorCode() { if (isFailure()) handleFailure(); } 50 51 // Redefine 'errorName()' in order to be compatible with ICU version 4.2 52 const char* errorName() const { 53 return u_errorName(errorCode); 54 } 55 56 protected: 57 virtual void handleFailure() const { 58 FSTERROR() << errorName(); 59} 60 61 private: 62 DISALLOW_COPY_AND_ASSIGN(IcuErrorCode); 63}; 64 65namespace fst { 66 67template <class Label> 68bool UTF8StringToLabels(const string &str, vector<Label> *labels) { 69 const char *c_str = str.c_str(); 70 int32_t length = str.size(); 71 UChar32 c; 72 for (int32_t i = 0; i < length; /* no update */) { 73 U8_NEXT(c_str, i, length, c); 74 if (c < 0) { 75 LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c; 76 return false; 77 } 78 labels->push_back(c); 79 } 80 return true; 81} 82 83template <class Label> 84bool LabelsToUTF8String(const vector<Label> &labels, string *str) { 85 icu::UnicodeString u_str; 86 char c_str[5]; 87 for (size_t i = 0; i < labels.size(); ++i) { 88 u_str.setTo(labels[i]); 89 IcuErrorCode error; 90 u_strToUTF8(c_str, 5, NULL, u_str.getTerminatedBuffer(), -1, error); 91 if (error.isFailure()) { 92 LOG(ERROR) << "LabelsToUTF8String: Bad encoding: " 93 << error.errorName(); 94 return false; 95 } 96 *str += c_str; 97 } 98 return true; 99} 100 101} // namespace fst 102 103#endif // FST_LIB_ICU_H_ 104