13345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file. 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#ifndef BASE_STRING_TOKENIZER_H_ 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#define BASE_STRING_TOKENIZER_H_ 73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once 8c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <algorithm> 10c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include <string> 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_piece.h" 13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// StringTokenizerT is a simple string tokenizer class. It works like an 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// iterator that with each step (see the Advance method) updates members that 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// refer to the next token in the input string. The user may optionally 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// configure the tokenizer to return delimiters. 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Warning: be careful not to pass a C string into the 2-arg constructor: 20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// StringTokenizer t("this is a test", " "); // WRONG 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// This will create a temporary std::string, save the begin() and end() 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// iterators, and then the string will be freed before we actually start 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// tokenizing it. 24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Instead, use a std::string or use the 3 arg constructor of CStringTokenizer. 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// EXAMPLE 1: 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// char input[] = "this is a test"; 30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// CStringTokenizer t(input, input + strlen(input), " "); 31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// while (t.GetNext()) { 32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// printf("%s\n", t.token().c_str()); 33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// } 34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Output: 36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// this 38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// is 39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// a 40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// test 41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// EXAMPLE 2: 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// std::string input = "no-cache=\"foo, bar\", private"; 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// StringTokenizer t(input, ", "); 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// t.set_quote_chars("\""); 48c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// while (t.GetNext()) { 49c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// printf("%s\n", t.token().c_str()); 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// } 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Output: 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// no-cache="foo, bar" 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// private 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// EXAMPLE 3: 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// bool next_is_option = false, next_is_value = false; 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// std::string input = "text/html; charset=UTF-8; foo=bar"; 62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// StringTokenizer t(input, "; ="); 63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// t.set_options(StringTokenizer::RETURN_DELIMS); 64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// while (t.GetNext()) { 65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// if (t.token_is_delim()) { 66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// switch (*t.token_begin()) { 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// case ';': 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// next_is_option = true; 69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// break; 70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// case '=': 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// next_is_value = true; 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// break; 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// } 74c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// } else { 75c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// const char* label; 76c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// if (next_is_option) { 77c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// label = "option-name"; 78c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// next_is_option = false; 79c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// } else if (next_is_value) { 80c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// label = "option-value"; 81c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// next_is_value = false; 82c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// } else { 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// label = "mime-type"; 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// } 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// printf("%s: %s\n", label, t.token().c_str()); 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// } 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// } 88c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 90c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate <class str, class const_iterator> 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass StringTokenizerT { 92c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public: 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott typedef typename str::value_type char_type; 94c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Options that may be pass to set_options() 96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott enum { 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Specifies the delimiters should be returned as tokens 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott RETURN_DELIMS = 1 << 0, 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott }; 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The string object must live longer than the tokenizer. (In particular this 102c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // should not be constructed with a temporary.) 103c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott StringTokenizerT(const str& string, 104c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const str& delims) { 105c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Init(string.begin(), string.end(), delims); 106c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 107c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 108c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott StringTokenizerT(const_iterator string_begin, 109c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const_iterator string_end, 110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const str& delims) { 111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Init(string_begin, string_end, delims); 112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Set the options for this tokenizer. By default, this is 0. 115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void set_options(int options) { options_ = options; } 116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Set the characters to regard as quotes. By default, this is empty. When 118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // a quote char is encountered, the tokenizer will switch into a mode where 119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // it ignores delimiters that it finds. It switches out of this mode once it 120c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // finds another instance of the quote char. If a backslash is encountered 121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // within a quoted string, then the next character is skipped. 122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void set_quote_chars(const str& quotes) { quotes_ = quotes; } 123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Call this method to advance the tokenizer to the next delimiter. This 125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // returns false if the tokenizer is complete. This method must be called 126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // before calling any of the token* methods. 127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool GetNext() { 128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (quotes_.empty() && options_ == 0) 129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return QuickGetNext(); 130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch else 131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return FullGetNext(); 132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 134c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Start iterating through tokens from the beginning of the string. 135c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void Reset() { 136c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott token_end_ = start_pos_; 137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Returns true if token is a delimiter. When the tokenizer is constructed 140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // with the RETURN_DELIMS option, this method can be used to check if the 141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // returned token is actually a delimiter. 142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool token_is_delim() const { return token_is_delim_; } 143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // If GetNext() returned true, then these methods may be used to read the 145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // value of the token. 146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const_iterator token_begin() const { return token_begin_; } 147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const_iterator token_end() const { return token_end_; } 148c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott str token() const { return str(token_begin_, token_end_); } 149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch base::StringPiece token_piece() const { 150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return base::StringPiece(&*token_begin_, 151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::distance(token_begin_, token_end_)); 152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private: 155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott void Init(const_iterator string_begin, 156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const_iterator string_end, 157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const str& delims) { 158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott start_pos_ = string_begin; 159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch token_begin_ = string_begin; 160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott token_end_ = string_begin; 161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott end_ = string_end; 162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott delims_ = delims; 163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott options_ = 0; 1643345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick token_is_delim_ = false; 165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Implementation of GetNext() for when we have no quote characters. We have 168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // two separate implementations because AdvanceOne() is a hot spot in large 169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // text files with large tokens. 170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch bool QuickGetNext() { 171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch token_is_delim_ = false; 172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (;;) { 173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch token_begin_ = token_end_; 174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (token_end_ == end_) 175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ++token_end_; 177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (delims_.find(*token_begin_) == str::npos) 178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch break; 1793345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // else skip over delimiter. 180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch while (token_end_ != end_ && delims_.find(*token_end_) == str::npos) 182c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ++token_end_; 183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return true; 184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 186c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Implementation of GetNext() for when we have to take quotes into account. 187c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch bool FullGetNext() { 188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch AdvanceState state; 189c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch token_is_delim_ = false; 190c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (;;) { 191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch token_begin_ = token_end_; 192c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (token_end_ == end_) 193c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 194c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ++token_end_; 195c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (AdvanceOne(&state, *token_begin_)) 196c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch break; 197c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (options_ & RETURN_DELIMS) { 198c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch token_is_delim_ = true; 199c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return true; 200c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 2013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // else skip over delimiter. 202c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 203c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch while (token_end_ != end_ && AdvanceOne(&state, *token_end_)) 204c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ++token_end_; 205c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return true; 206c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 207c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 208c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool IsDelim(char_type c) const { 209c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return delims_.find(c) != str::npos; 210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool IsQuote(char_type c) const { 213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return quotes_.find(c) != str::npos; 214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott struct AdvanceState { 217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool in_quote; 218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool in_escape; 219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott char_type quote_char; 220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott AdvanceState() : in_quote(false), in_escape(false) {} 221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott }; 222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Returns true if a delimiter was not hit. 224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool AdvanceOne(AdvanceState* state, char_type c) { 225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (state->in_quote) { 226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (state->in_escape) { 227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott state->in_escape = false; 228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else if (c == '\\') { 229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott state->in_escape = true; 230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else if (c == state->quote_char) { 231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott state->in_quote = false; 232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (IsDelim(c)) 235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return false; 236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott state->in_quote = IsQuote(state->quote_char = c); 237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return true; 239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const_iterator start_pos_; 242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const_iterator token_begin_; 243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const_iterator token_end_; 244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const_iterator end_; 245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott str delims_; 246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott str quotes_; 247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott int options_; 248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool token_is_delim_; 249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 250c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 251c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttypedef StringTokenizerT<std::string, std::string::const_iterator> 252c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott StringTokenizer; 253c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttypedef StringTokenizerT<std::wstring, std::wstring::const_iterator> 254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott WStringTokenizer; 255c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttypedef StringTokenizerT<std::string, const char*> CStringTokenizer; 256c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 257c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#endif // BASE_STRING_TOKENIZER_H_ 258