180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// Copyright (C) 2011 The Libphonenumber Authors 280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// 380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// Licensed under the Apache License, Version 2.0 (the "License"); 480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// you may not use this file except in compliance with the License. 580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// You may obtain a copy of the License at 680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// 780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// http://www.apache.org/licenses/LICENSE-2.0 880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// 980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// Unless required by applicable law or agreed to in writing, software 1080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// distributed under the License is distributed on an "AS IS" BASIS, 1180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// See the License for the specific language governing permissions and 1380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// limitations under the License. 1480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 1580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// Author: Philippe Liard 1680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 1780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#ifndef I18N_PHONENUMBERS_UNICODESTRING_H_ 1880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#define I18N_PHONENUMBERS_UNICODESTRING_H_ 1980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 2080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#include "phonenumbers/utf/unicodetext.h" 2180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 2280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#include <cstring> 2380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#include <limits> 2480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 2580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.comnamespace i18n { 2680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.comnamespace phonenumbers { 2780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 2880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// This class supports the minimal subset of icu::UnicodeString needed by 2980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// AsYouTypeFormatter in order to let the libphonenumber not depend on ICU 3080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// which is not available by default on some systems, such as iOS. 3180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.comclass UnicodeString { 3280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com public: 3380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com UnicodeString() : cached_index_(-1) {} 3480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 3580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // Constructs a new unicode string copying the provided C string. 3680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com explicit UnicodeString(const char* utf8) 3780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com : text_(UTF8ToUnicodeText(utf8, std::strlen(utf8))), 3880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com cached_index_(-1) {} 3980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 4080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // Constructs a new unicode string containing the provided codepoint. 4180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com explicit UnicodeString(char32 codepoint) : cached_index_(-1) { 4280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com append(codepoint); 4380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com } 4480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 4580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com UnicodeString(const UnicodeString& src) 4680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com : text_(src.text_), cached_index_(-1) {} 4780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 4880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com UnicodeString& operator=(const UnicodeString& src); 4980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 5080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com bool operator==(const UnicodeString& rhs) const; 5180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 5280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com void append(const UnicodeString& unicode_string); 5380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 5480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com inline void append(char32 codepoint) { 5580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com invalidateCachedIndex(); 5680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com text_.push_back(codepoint); 5780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com } 5880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 5980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com typedef UnicodeText::const_iterator const_iterator; 6080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 6180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com inline const_iterator begin() const { 6280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com return text_.begin(); 6380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com } 6480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 6580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com inline const_iterator end() const { 6680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com return text_.end(); 6780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com } 6880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 6980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // Returns the index of the provided codepoint or -1 if not found. 7080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com int indexOf(char32 codepoint) const; 7180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 7280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // Returns the number of codepoints contained in the unicode string. 7380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com inline int length() const { 7480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com return text_.size(); 7580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com } 7680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 7780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // Clears the unicode string. 7880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com inline void remove() { 7980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com invalidateCachedIndex(); 8080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com text_.clear(); 8180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com } 8280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 8380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // Replaces the substring located at [ start, start + length - 1 ] with the 8480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // provided unicode string. 8580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com void replace(int start, int length, const UnicodeString& src); 8680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 8780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com void setCharAt(int pos, char32 c); 8880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 8980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // Copies the provided C string. 9080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com inline void setTo(const char* s, size_t len) { 9180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com invalidateCachedIndex(); 9280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com text_.CopyUTF8(s, len); 9380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com } 9480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 9580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // Returns the substring located at [ start, start + length - 1 ] without 9680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // copying the underlying C string. If one of the provided parameters is out 9780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // of range, the function returns an empty unicode string. 9880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com UnicodeString tempSubString( 9980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com int start, 10080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com int length = std::numeric_limits<int>::max()) const; 10180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 10280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com inline void toUTF8String(string& out) const { 10380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com out = UnicodeTextToUTF8(text_); 10480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com } 10580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 10680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com char32 operator[](int index) const; 10780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 10880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com private: 10980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com UnicodeText text_; 11080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 11180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // As UnicodeText doesn't provide random access, an operator[] implementation 11280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // would naively iterate from the beginning of the string to the supplied 11380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // index which would be inefficient. 11480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // As operator[] is very likely to be called in a loop with consecutive 11580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // indexes, we save the corresponding iterator so we can reuse it the next 11680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // time it is called. 11780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 11880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // The following function which invalidates the cached index corresponding to 11980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // the iterator position must be called every time the unicode string is 12080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // modified (i.e. in all the non-const methods). 12180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com inline void invalidateCachedIndex() { 12280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com cached_index_ = -1; 12380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com } 12480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 12580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com // Iterator corresponding to the cached index below, used by operator[]. 12680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com mutable UnicodeText::const_iterator cached_it_; 12780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com mutable int cached_index_; 12880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com}; 12980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 13080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com} // namespace phonenumbers 13180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com} // namespace i18n 13280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com 13380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#endif // I18N_PHONENUMBERS_UNICODESTRING_H_ 134