15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef BASE_I18N_CHAR_ITERATOR_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define BASE_I18N_CHAR_ITERATOR_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/base_i18n_export.h" 12868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string16.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The CharIterator classes iterate through the characters in UTF8 and 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// UTF16 strings. Example usage: 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// UTF8CharIterator iter(&str); 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// while (!iter.End()) { 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// VLOG(1) << iter.get(); 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// iter.Advance(); 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(OS_WIN) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef unsigned char uint8_t; 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace base { 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace i18n { 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BASE_I18N_EXPORT UTF8CharIterator { 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Requires |str| to live as long as the UTF8CharIterator does. 332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) explicit UTF8CharIterator(const std::string* str); 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~UTF8CharIterator(); 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Return the starting array index of the current character within the 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // string. 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 array_pos() const { return array_pos_; } 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Return the logical index of the current character, independent of the 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // number of bytes each character takes. 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 char_pos() const { return char_pos_; } 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Return the current char. 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 get() const { return char_; } 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if we're at the end of the string. 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool end() const { return array_pos_ == len_; } 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Advance to the next actual character. Returns false if we're at the 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // end of the string. 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Advance(); 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The string we're iterating over. 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const uint8_t* str_; 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The length of the encoded string. 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 len_; 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Array index. 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 array_pos_; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The next array index. 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 next_pos_; 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Character index. 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 char_pos_; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The current character. 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 char_; 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator); 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BASE_I18N_EXPORT UTF16CharIterator { 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Requires |str| to live as long as the UTF16CharIterator does. 792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) explicit UTF16CharIterator(const string16* str); 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UTF16CharIterator(const char16* str, size_t str_len); 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~UTF16CharIterator(); 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Return the starting array index of the current character within the 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // string. 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 array_pos() const { return array_pos_; } 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Return the logical index of the current character, independent of the 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // number of codewords each character takes. 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 char_pos() const { return char_pos_; } 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Return the current char. 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 get() const { return char_; } 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if we're at the end of the string. 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool end() const { return array_pos_ == len_; } 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Advance to the next actual character. Returns false if we're at the 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // end of the string. 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Advance(); 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Fills in the current character we found and advances to the next 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // character, updating all flags as necessary. 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void ReadChar(); 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The string we're iterating over. 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const char16* str_; 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The length of the encoded string. 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 len_; 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Array index. 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 array_pos_; 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The next array index. 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 next_pos_; 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Character index. 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 char_pos_; 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The current character. 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int32 char_; 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator); 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace i18n 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace base 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // BASE_I18N_CHAR_ITERATOR_H_ 131