15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#ifndef BASE_STRINGS_STRING_TOKENIZER_H_ 62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#define BASE_STRINGS_STRING_TOKENIZER_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <algorithm> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/strings/string_piece.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace base { 142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StringTokenizerT is a simple string tokenizer class. It works like an 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// iterator that with each step (see the Advance method) updates members that 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// refer to the next token in the input string. The user may optionally 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// configure the tokenizer to return delimiters. 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Warning: be careful not to pass a C string into the 2-arg constructor: 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StringTokenizer t("this is a test", " "); // WRONG 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This will create a temporary std::string, save the begin() and end() 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// iterators, and then the string will be freed before we actually start 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// tokenizing it. 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Instead, use a std::string or use the 3 arg constructor of CStringTokenizer. 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EXAMPLE 1: 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// char input[] = "this is a test"; 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// CStringTokenizer t(input, input + strlen(input), " "); 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// while (t.GetNext()) { 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// printf("%s\n", t.token().c_str()); 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Output: 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// this 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// is 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// test 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EXAMPLE 2: 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// std::string input = "no-cache=\"foo, bar\", private"; 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StringTokenizer t(input, ", "); 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// t.set_quote_chars("\""); 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// while (t.GetNext()) { 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// printf("%s\n", t.token().c_str()); 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Output: 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// no-cache="foo, bar" 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// private 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EXAMPLE 3: 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// bool next_is_option = false, next_is_value = false; 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// std::string input = "text/html; charset=UTF-8; foo=bar"; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StringTokenizer t(input, "; ="); 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// t.set_options(StringTokenizer::RETURN_DELIMS); 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// while (t.GetNext()) { 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// if (t.token_is_delim()) { 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// switch (*t.token_begin()) { 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// case ';': 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// next_is_option = true; 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// break; 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// case '=': 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// next_is_value = true; 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// break; 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } else { 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// const char* label; 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// if (next_is_option) { 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// label = "option-name"; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// next_is_option = false; 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } else if (next_is_value) { 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// label = "option-value"; 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// next_is_value = false; 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } else { 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// label = "mime-type"; 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// printf("%s: %s\n", label, t.token().c_str()); 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// } 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)template <class str, class const_iterator> 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class StringTokenizerT { 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) typedef typename str::value_type char_type; 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Options that may be pass to set_options() 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum { 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Specifies the delimiters should be returned as tokens 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) RETURN_DELIMS = 1 << 0, 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The string object must live longer than the tokenizer. (In particular this 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // should not be constructed with a temporary.) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringTokenizerT(const str& string, 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const str& delims) { 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Init(string.begin(), string.end(), delims); 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringTokenizerT(const_iterator string_begin, 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const_iterator string_end, 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const str& delims) { 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Init(string_begin, string_end, delims); 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Set the options for this tokenizer. By default, this is 0. 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_options(int options) { options_ = options; } 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Set the characters to regard as quotes. By default, this is empty. When 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // a quote char is encountered, the tokenizer will switch into a mode where 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // it ignores delimiters that it finds. It switches out of this mode once it 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // finds another instance of the quote char. If a backslash is encountered 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // within a quoted string, then the next character is skipped. 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_quote_chars(const str& quotes) { quotes_ = quotes; } 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Call this method to advance the tokenizer to the next delimiter. This 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // returns false if the tokenizer is complete. This method must be called 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // before calling any of the token* methods. 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool GetNext() { 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (quotes_.empty() && options_ == 0) 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return QuickGetNext(); 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return FullGetNext(); 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Start iterating through tokens from the beginning of the string. 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void Reset() { 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) token_end_ = start_pos_; 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if token is a delimiter. When the tokenizer is constructed 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // with the RETURN_DELIMS option, this method can be used to check if the 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // returned token is actually a delimiter. 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool token_is_delim() const { return token_is_delim_; } 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If GetNext() returned true, then these methods may be used to read the 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // value of the token. 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const_iterator token_begin() const { return token_begin_; } 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const_iterator token_end() const { return token_end_; } 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str token() const { return str(token_begin_, token_end_); } 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::StringPiece token_piece() const { 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return base::StringPiece(&*token_begin_, 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::distance(token_begin_, token_end_)); 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void Init(const_iterator string_begin, 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const_iterator string_end, 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const str& delims) { 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) start_pos_ = string_begin; 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) token_begin_ = string_begin; 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) token_end_ = string_begin; 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) end_ = string_end; 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) delims_ = delims; 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) options_ = 0; 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) token_is_delim_ = false; 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Implementation of GetNext() for when we have no quote characters. We have 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // two separate implementations because AdvanceOne() is a hot spot in large 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // text files with large tokens. 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool QuickGetNext() { 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) token_is_delim_ = false; 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (;;) { 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) token_begin_ = token_end_; 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (token_end_ == end_) 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++token_end_; 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (delims_.find(*token_begin_) == str::npos) 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // else skip over delimiter. 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (token_end_ != end_ && delims_.find(*token_end_) == str::npos) 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++token_end_; 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Implementation of GetNext() for when we have to take quotes into account. 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool FullGetNext() { 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AdvanceState state; 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) token_is_delim_ = false; 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (;;) { 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) token_begin_ = token_end_; 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (token_end_ == end_) 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++token_end_; 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (AdvanceOne(&state, *token_begin_)) 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (options_ & RETURN_DELIMS) { 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) token_is_delim_ = true; 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // else skip over delimiter. 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (token_end_ != end_ && AdvanceOne(&state, *token_end_)) 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++token_end_; 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool IsDelim(char_type c) const { 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return delims_.find(c) != str::npos; 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool IsQuote(char_type c) const { 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return quotes_.find(c) != str::npos; 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) struct AdvanceState { 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool in_quote; 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool in_escape; 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char_type quote_char; 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {} 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns true if a delimiter was not hit. 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool AdvanceOne(AdvanceState* state, char_type c) { 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (state->in_quote) { 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (state->in_escape) { 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state->in_escape = false; 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (c == '\\') { 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state->in_escape = true; 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (c == state->quote_char) { 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state->in_quote = false; 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (IsDelim(c)) 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state->in_quote = IsQuote(state->quote_char = c); 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const_iterator start_pos_; 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const_iterator token_begin_; 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const_iterator token_end_; 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const_iterator end_; 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str delims_; 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) str quotes_; 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int options_; 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool token_is_delim_; 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef StringTokenizerT<std::string, std::string::const_iterator> 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) StringTokenizer; 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef StringTokenizerT<std::wstring, std::wstring::const_iterator> 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) WStringTokenizer; 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef StringTokenizerT<std::string, const char*> CStringTokenizer; 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} // namespace base 2592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#endif // BASE_STRINGS_STRING_TOKENIZER_H_ 261