1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef BASE_I18N_CHAR_ITERATOR_H_ 6#define BASE_I18N_CHAR_ITERATOR_H_ 7 8#include <string> 9 10#include "base/basictypes.h" 11#include "base/i18n/base_i18n_export.h" 12#include "base/strings/string16.h" 13 14// The CharIterator classes iterate through the characters in UTF8 and 15// UTF16 strings. Example usage: 16// 17// UTF8CharIterator iter(&str); 18// while (!iter.End()) { 19// VLOG(1) << iter.get(); 20// iter.Advance(); 21// } 22 23#if defined(OS_WIN) 24typedef unsigned char uint8_t; 25#endif 26 27namespace base { 28namespace i18n { 29 30class BASE_I18N_EXPORT UTF8CharIterator { 31 public: 32 // Requires |str| to live as long as the UTF8CharIterator does. 33 explicit UTF8CharIterator(const std::string* str); 34 ~UTF8CharIterator(); 35 36 // Return the starting array index of the current character within the 37 // string. 38 int32 array_pos() const { return array_pos_; } 39 40 // Return the logical index of the current character, independent of the 41 // number of bytes each character takes. 42 int32 char_pos() const { return char_pos_; } 43 44 // Return the current char. 45 int32 get() const { return char_; } 46 47 // Returns true if we're at the end of the string. 48 bool end() const { return array_pos_ == len_; } 49 50 // Advance to the next actual character. Returns false if we're at the 51 // end of the string. 52 bool Advance(); 53 54 private: 55 // The string we're iterating over. 56 const uint8_t* str_; 57 58 // The length of the encoded string. 59 int32 len_; 60 61 // Array index. 62 int32 array_pos_; 63 64 // The next array index. 65 int32 next_pos_; 66 67 // Character index. 68 int32 char_pos_; 69 70 // The current character. 71 int32 char_; 72 73 DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator); 74}; 75 76class BASE_I18N_EXPORT UTF16CharIterator { 77 public: 78 // Requires |str| to live as long as the UTF16CharIterator does. 79 explicit UTF16CharIterator(const string16* str); 80 UTF16CharIterator(const char16* str, size_t str_len); 81 ~UTF16CharIterator(); 82 83 // Return the starting array index of the current character within the 84 // string. 85 int32 array_pos() const { return array_pos_; } 86 87 // Return the logical index of the current character, independent of the 88 // number of codewords each character takes. 89 int32 char_pos() const { return char_pos_; } 90 91 // Return the current char. 92 int32 get() const { return char_; } 93 94 // Returns true if we're at the end of the string. 95 bool end() const { return array_pos_ == len_; } 96 97 // Advance to the next actual character. Returns false if we're at the 98 // end of the string. 99 bool Advance(); 100 101 private: 102 // Fills in the current character we found and advances to the next 103 // character, updating all flags as necessary. 104 void ReadChar(); 105 106 // The string we're iterating over. 107 const char16* str_; 108 109 // The length of the encoded string. 110 int32 len_; 111 112 // Array index. 113 int32 array_pos_; 114 115 // The next array index. 116 int32 next_pos_; 117 118 // Character index. 119 int32 char_pos_; 120 121 // The current character. 122 int32 char_; 123 124 DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator); 125}; 126 127} // namespace i18n 128} // namespace base 129 130#endif // BASE_I18N_CHAR_ITERATOR_H_ 131