1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef BASE_I18N_CHAR_ITERATOR_H_
6#define BASE_I18N_CHAR_ITERATOR_H_
7
8#include <string>
9
10#include "base/basictypes.h"
11#include "base/i18n/base_i18n_export.h"
12#include "base/strings/string16.h"
13
14// The CharIterator classes iterate through the characters in UTF8 and
15// UTF16 strings.  Example usage:
16//
17//   UTF8CharIterator iter(&str);
18//   while (!iter.End()) {
19//     VLOG(1) << iter.get();
20//     iter.Advance();
21//   }
22
23#if defined(OS_WIN)
24typedef unsigned char uint8_t;
25#endif
26
27namespace base {
28namespace i18n {
29
30class BASE_I18N_EXPORT UTF8CharIterator {
31 public:
32  // Requires |str| to live as long as the UTF8CharIterator does.
33  explicit UTF8CharIterator(const std::string* str);
34  ~UTF8CharIterator();
35
36  // Return the starting array index of the current character within the
37  // string.
38  int32 array_pos() const { return array_pos_; }
39
40  // Return the logical index of the current character, independent of the
41  // number of bytes each character takes.
42  int32 char_pos() const { return char_pos_; }
43
44  // Return the current char.
45  int32 get() const { return char_; }
46
47  // Returns true if we're at the end of the string.
48  bool end() const { return array_pos_ == len_; }
49
50  // Advance to the next actual character.  Returns false if we're at the
51  // end of the string.
52  bool Advance();
53
54 private:
55  // The string we're iterating over.
56  const uint8_t* str_;
57
58  // The length of the encoded string.
59  int32 len_;
60
61  // Array index.
62  int32 array_pos_;
63
64  // The next array index.
65  int32 next_pos_;
66
67  // Character index.
68  int32 char_pos_;
69
70  // The current character.
71  int32 char_;
72
73  DISALLOW_COPY_AND_ASSIGN(UTF8CharIterator);
74};
75
76class BASE_I18N_EXPORT UTF16CharIterator {
77 public:
78  // Requires |str| to live as long as the UTF16CharIterator does.
79  explicit UTF16CharIterator(const string16* str);
80  UTF16CharIterator(const char16* str, size_t str_len);
81  ~UTF16CharIterator();
82
83  // Return the starting array index of the current character within the
84  // string.
85  int32 array_pos() const { return array_pos_; }
86
87  // Return the logical index of the current character, independent of the
88  // number of codewords each character takes.
89  int32 char_pos() const { return char_pos_; }
90
91  // Return the current char.
92  int32 get() const { return char_; }
93
94  // Returns true if we're at the end of the string.
95  bool end() const { return array_pos_ == len_; }
96
97  // Advance to the next actual character.  Returns false if we're at the
98  // end of the string.
99  bool Advance();
100
101 private:
102  // Fills in the current character we found and advances to the next
103  // character, updating all flags as necessary.
104  void ReadChar();
105
106  // The string we're iterating over.
107  const char16* str_;
108
109  // The length of the encoded string.
110  int32 len_;
111
112  // Array index.
113  int32 array_pos_;
114
115  // The next array index.
116  int32 next_pos_;
117
118  // Character index.
119  int32 char_pos_;
120
121  // The current character.
122  int32 char_;
123
124  DISALLOW_COPY_AND_ASSIGN(UTF16CharIterator);
125};
126
127}  // namespace i18n
128}  // namespace base
129
130#endif  // BASE_I18N_CHAR_ITERATOR_H_
131