13345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file.
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef BASE_STRING_TOKENIZER_H_
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define BASE_STRING_TOKENIZER_H_
73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once
8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <algorithm>
10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <string>
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_piece.h"
13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// StringTokenizerT is a simple string tokenizer class.  It works like an
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// iterator that with each step (see the Advance method) updates members that
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// refer to the next token in the input string.  The user may optionally
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// configure the tokenizer to return delimiters.
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Warning: be careful not to pass a C string into the 2-arg constructor:
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// StringTokenizer t("this is a test", " ");  // WRONG
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// This will create a temporary std::string, save the begin() and end()
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// iterators, and then the string will be freed before we actually start
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// tokenizing it.
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Instead, use a std::string or use the 3 arg constructor of CStringTokenizer.
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// EXAMPLE 1:
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   char input[] = "this is a test";
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   CStringTokenizer t(input, input + strlen(input), " ");
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   while (t.GetNext()) {
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     printf("%s\n", t.token().c_str());
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   }
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Output:
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   this
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   is
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   a
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   test
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// EXAMPLE 2:
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   std::string input = "no-cache=\"foo, bar\", private";
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   StringTokenizer t(input, ", ");
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   t.set_quote_chars("\"");
48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   while (t.GetNext()) {
49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     printf("%s\n", t.token().c_str());
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   }
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Output:
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   no-cache="foo, bar"
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   private
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// EXAMPLE 3:
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   bool next_is_option = false, next_is_value = false;
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   std::string input = "text/html; charset=UTF-8; foo=bar";
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   StringTokenizer t(input, "; =");
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   t.set_options(StringTokenizer::RETURN_DELIMS);
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   while (t.GetNext()) {
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     if (t.token_is_delim()) {
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//       switch (*t.token_begin()) {
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//         case ';':
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//           next_is_option = true;
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//           break;
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//         case '=':
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//           next_is_value = true;
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//           break;
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//       }
74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     } else {
75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//       const char* label;
76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//       if (next_is_option) {
77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//         label = "option-name";
78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//         next_is_option = false;
79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//       } else if (next_is_value) {
80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//         label = "option-value";
81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//         next_is_value = false;
82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//       } else {
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//         label = "mime-type";
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//       }
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//       printf("%s: %s\n", label, t.token().c_str());
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//     }
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   }
88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate <class str, class const_iterator>
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass StringTokenizerT {
92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public:
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  typedef typename str::value_type char_type;
94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Options that may be pass to set_options()
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  enum {
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    // Specifies the delimiters should be returned as tokens
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    RETURN_DELIMS = 1 << 0,
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The string object must live longer than the tokenizer.  (In particular this
102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // should not be constructed with a temporary.)
103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  StringTokenizerT(const str& string,
104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                   const str& delims) {
105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    Init(string.begin(), string.end(), delims);
106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  StringTokenizerT(const_iterator string_begin,
109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                   const_iterator string_end,
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                   const str& delims) {
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    Init(string_begin, string_end, delims);
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Set the options for this tokenizer.  By default, this is 0.
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void set_options(int options) { options_ = options; }
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Set the characters to regard as quotes.  By default, this is empty.  When
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // a quote char is encountered, the tokenizer will switch into a mode where
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // it ignores delimiters that it finds.  It switches out of this mode once it
120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // finds another instance of the quote char.  If a backslash is encountered
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // within a quoted string, then the next character is skipped.
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void set_quote_chars(const str& quotes) { quotes_ = quotes; }
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Call this method to advance the tokenizer to the next delimiter.  This
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // returns false if the tokenizer is complete.  This method must be called
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // before calling any of the token* methods.
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool GetNext() {
128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (quotes_.empty() && options_ == 0)
129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return QuickGetNext();
130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    else
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return FullGetNext();
132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Start iterating through tokens from the beginning of the string.
135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void Reset() {
136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    token_end_ = start_pos_;
137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Returns true if token is a delimiter.  When the tokenizer is constructed
140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // with the RETURN_DELIMS option, this method can be used to check if the
141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // returned token is actually a delimiter.
142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool token_is_delim() const { return token_is_delim_; }
143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // If GetNext() returned true, then these methods may be used to read the
145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // value of the token.
146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const_iterator token_begin() const { return token_begin_; }
147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const_iterator token_end() const { return token_end_; }
148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  str token() const { return str(token_begin_, token_end_); }
149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  base::StringPiece token_piece() const {
150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return base::StringPiece(&*token_begin_,
151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                             std::distance(token_begin_, token_end_));
152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private:
155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  void Init(const_iterator string_begin,
156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            const_iterator string_end,
157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            const str& delims) {
158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    start_pos_ = string_begin;
159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    token_begin_ = string_begin;
160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    token_end_ = string_begin;
161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    end_ = string_end;
162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    delims_ = delims;
163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    options_ = 0;
1643345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    token_is_delim_ = false;
165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Implementation of GetNext() for when we have no quote characters. We have
168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // two separate implementations because AdvanceOne() is a hot spot in large
169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // text files with large tokens.
170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  bool QuickGetNext() {
171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    token_is_delim_ = false;
172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    for (;;) {
173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      token_begin_ = token_end_;
174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (token_end_ == end_)
175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        return false;
176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ++token_end_;
177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (delims_.find(*token_begin_) == str::npos)
178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        break;
1793345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      // else skip over delimiter.
180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    while (token_end_ != end_ && delims_.find(*token_end_) == str::npos)
182c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ++token_end_;
183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return true;
184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
186c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Implementation of GetNext() for when we have to take quotes into account.
187c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  bool FullGetNext() {
188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    AdvanceState state;
189c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    token_is_delim_ = false;
190c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    for (;;) {
191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      token_begin_ = token_end_;
192c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (token_end_ == end_)
193c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        return false;
194c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ++token_end_;
195c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (AdvanceOne(&state, *token_begin_))
196c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        break;
197c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (options_ & RETURN_DELIMS) {
198c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        token_is_delim_ = true;
199c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        return true;
200c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      }
2013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      // else skip over delimiter.
202c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
203c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    while (token_end_ != end_ && AdvanceOne(&state, *token_end_))
204c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ++token_end_;
205c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return true;
206c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
207c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool IsDelim(char_type c) const {
209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return delims_.find(c) != str::npos;
210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool IsQuote(char_type c) const {
213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return quotes_.find(c) != str::npos;
214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  struct AdvanceState {
217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bool in_quote;
218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    bool in_escape;
219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    char_type quote_char;
220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    AdvanceState() : in_quote(false), in_escape(false) {}
221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Returns true if a delimiter was not hit.
224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool AdvanceOne(AdvanceState* state, char_type c) {
225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (state->in_quote) {
226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      if (state->in_escape) {
227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        state->in_escape = false;
228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      } else if (c == '\\') {
229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        state->in_escape = true;
230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      } else if (c == state->quote_char) {
231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        state->in_quote = false;
232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      }
233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } else {
234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      if (IsDelim(c))
235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        return false;
236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      state->in_quote = IsQuote(state->quote_char = c);
237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return true;
239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const_iterator start_pos_;
242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const_iterator token_begin_;
243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const_iterator token_end_;
244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  const_iterator end_;
245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  str delims_;
246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  str quotes_;
247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  int options_;
248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool token_is_delim_;
249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttypedef StringTokenizerT<std::string, std::string::const_iterator>
252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    StringTokenizer;
253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttypedef StringTokenizerT<std::wstring, std::wstring::const_iterator>
254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    WStringTokenizer;
255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttypedef StringTokenizerT<std::string, const char*> CStringTokenizer;
256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif  // BASE_STRING_TOKENIZER_H_
258