180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// Copyright (C) 2011 The Libphonenumber Authors
280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com//
380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// Licensed under the Apache License, Version 2.0 (the "License");
480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// you may not use this file except in compliance with the License.
580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// You may obtain a copy of the License at
680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com//
780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// http://www.apache.org/licenses/LICENSE-2.0
880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com//
980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// Unless required by applicable law or agreed to in writing, software
1080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// distributed under the License is distributed on an "AS IS" BASIS,
1180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// See the License for the specific language governing permissions and
1380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// limitations under the License.
1480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
1580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// Author: Philippe Liard
1680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
1780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#ifndef I18N_PHONENUMBERS_UNICODESTRING_H_
1880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#define I18N_PHONENUMBERS_UNICODESTRING_H_
1980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
2080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#include "phonenumbers/utf/unicodetext.h"
2180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
2280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#include <cstring>
2380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#include <limits>
2480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
2580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.comnamespace i18n {
2680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.comnamespace phonenumbers {
2780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
2880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// This class supports the minimal subset of icu::UnicodeString needed by
2980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// AsYouTypeFormatter in order to let the libphonenumber not depend on ICU
3080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com// which is not available by default on some systems, such as iOS.
3180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.comclass UnicodeString {
3280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com public:
3380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  UnicodeString() : cached_index_(-1) {}
3480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
3580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // Constructs a new unicode string copying the provided C string.
3680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  explicit UnicodeString(const char* utf8)
3780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com      : text_(UTF8ToUnicodeText(utf8, std::strlen(utf8))),
3880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com        cached_index_(-1) {}
3980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
4080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // Constructs a new unicode string containing the provided codepoint.
4180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  explicit UnicodeString(char32 codepoint) : cached_index_(-1) {
4280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    append(codepoint);
4380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  }
4480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
4580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  UnicodeString(const UnicodeString& src)
4680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com      : text_(src.text_), cached_index_(-1) {}
4780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
4880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  UnicodeString& operator=(const UnicodeString& src);
4980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
5080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  bool operator==(const UnicodeString& rhs) const;
5180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
5280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  void append(const UnicodeString& unicode_string);
5380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
5480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  inline void append(char32 codepoint) {
5580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    invalidateCachedIndex();
5680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    text_.push_back(codepoint);
5780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  }
5880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
5980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  typedef UnicodeText::const_iterator const_iterator;
6080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
6180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  inline const_iterator begin() const {
6280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    return text_.begin();
6380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  }
6480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
6580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  inline const_iterator end() const {
6680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    return text_.end();
6780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  }
6880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
6980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // Returns the index of the provided codepoint or -1 if not found.
7080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  int indexOf(char32 codepoint) const;
7180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
7280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // Returns the number of codepoints contained in the unicode string.
7380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  inline int length() const {
7480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    return text_.size();
7580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  }
7680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
7780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // Clears the unicode string.
7880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  inline void remove() {
7980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    invalidateCachedIndex();
8080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    text_.clear();
8180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  }
8280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
8380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // Replaces the substring located at [ start, start + length - 1 ] with the
8480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // provided unicode string.
8580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  void replace(int start, int length, const UnicodeString& src);
8680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
8780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  void setCharAt(int pos, char32 c);
8880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
8980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // Copies the provided C string.
9080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  inline void setTo(const char* s, size_t len) {
9180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    invalidateCachedIndex();
9280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    text_.CopyUTF8(s, len);
9380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  }
9480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
9580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // Returns the substring located at [ start, start + length - 1 ] without
9680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // copying the underlying C string. If one of the provided parameters is out
9780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // of range, the function returns an empty unicode string.
9880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  UnicodeString tempSubString(
9980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com      int start,
10080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com      int length = std::numeric_limits<int>::max()) const;
10180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
10280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  inline void toUTF8String(string& out) const {
10380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    out = UnicodeTextToUTF8(text_);
10480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  }
10580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
10680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  char32 operator[](int index) const;
10780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
10880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com private:
10980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  UnicodeText text_;
11080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
11180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // As UnicodeText doesn't provide random access, an operator[] implementation
11280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // would naively iterate from the beginning of the string to the supplied
11380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // index which would be inefficient.
11480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // As operator[] is very likely to be called in a loop with consecutive
11580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // indexes, we save the corresponding iterator so we can reuse it the next
11680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // time it is called.
11780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
11880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // The following function which invalidates the cached index corresponding to
11980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // the iterator position must be called every time the unicode string is
12080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // modified (i.e. in all the non-const methods).
12180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  inline void invalidateCachedIndex() {
12280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com    cached_index_ = -1;
12380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  }
12480d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
12580d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  // Iterator corresponding to the cached index below, used by operator[].
12680d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  mutable UnicodeText::const_iterator cached_it_;
12780d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com  mutable int cached_index_;
12880d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com};
12980d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
13080d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com}  // namespace phonenumbers
13180d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com}  // namespace i18n
13280d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com
13380d738a894e26aa958eacc62afbc70617b13dc83philip.liard@gmail.com#endif  // I18N_PHONENUMBERS_UNICODESTRING_H_
134