15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef BASE_I18N_CHAR_ITERATOR_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define BASE_I18N_CHAR_ITERATOR_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/base_i18n_export.h"
12868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string16.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The CharIterator classes iterate through the characters in UTF8 and
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// UTF16 strings.  Example usage:
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   UTF8CharIterator iter(&str);
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   while (!iter.End()) {
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     VLOG(1) << iter.get();
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     iter.Advance();
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   }
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(OS_WIN)
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef unsigned char uint8_t;
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace base {
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace i18n {
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BASE_I18N_EXPORT UTF8CharIterator {
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Requires |str| to live as long as the UTF8CharIterator does.
332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  explicit UTF8CharIterator(const std::string* str);
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~UTF8CharIterator();
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return the starting array index of the current character within the
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // string.
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 array_pos() const { return array_pos_; }
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return the logical index of the current character, independent of the
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // number of bytes each character takes.
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 char_pos() const { return char_pos_; }
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return the current char.
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 get() const { return char_; }
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if we're at the end of the string.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool end() const { return array_pos_ == len_; }
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Advance to the next actual character.  Returns false if we're at the
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // end of the string.
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Advance();
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The string we're iterating over.
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint8_t* str_;
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The length of the encoded string.
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 len_;
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Array index.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 array_pos_;
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The next array index.
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 next_pos_;
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Character index.
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 char_pos_;
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The current character.
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 char_;
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator);
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BASE_I18N_EXPORT UTF16CharIterator {
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Requires |str| to live as long as the UTF16CharIterator does.
792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  explicit UTF16CharIterator(const string16* str);
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UTF16CharIterator(const char16* str, size_t str_len);
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~UTF16CharIterator();
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return the starting array index of the current character within the
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // string.
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 array_pos() const { return array_pos_; }
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return the logical index of the current character, independent of the
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // number of codewords each character takes.
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 char_pos() const { return char_pos_; }
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return the current char.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 get() const { return char_; }
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if we're at the end of the string.
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool end() const { return array_pos_ == len_; }
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Advance to the next actual character.  Returns false if we're at the
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // end of the string.
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Advance();
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Fills in the current character we found and advances to the next
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // character, updating all flags as necessary.
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void ReadChar();
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The string we're iterating over.
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char16* str_;
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The length of the encoded string.
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 len_;
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Array index.
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 array_pos_;
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The next array index.
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 next_pos_;
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Character index.
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 char_pos_;
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The current character.
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int32 char_;
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator);
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace i18n
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace base
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // BASE_I18N_CHAR_ITERATOR_H_
131