15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#ifndef BASE_STRINGS_STRING_TOKENIZER_H_
62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#define BASE_STRINGS_STRING_TOKENIZER_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <algorithm>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/strings/string_piece.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace base {
142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StringTokenizerT is a simple string tokenizer class.  It works like an
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// iterator that with each step (see the Advance method) updates members that
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// refer to the next token in the input string.  The user may optionally
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// configure the tokenizer to return delimiters.
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Warning: be careful not to pass a C string into the 2-arg constructor:
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StringTokenizer t("this is a test", " ");  // WRONG
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This will create a temporary std::string, save the begin() and end()
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// iterators, and then the string will be freed before we actually start
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// tokenizing it.
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Instead, use a std::string or use the 3 arg constructor of CStringTokenizer.
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EXAMPLE 1:
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   char input[] = "this is a test";
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   CStringTokenizer t(input, input + strlen(input), " ");
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   while (t.GetNext()) {
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     printf("%s\n", t.token().c_str());
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   }
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Output:
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   this
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   is
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   a
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   test
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EXAMPLE 2:
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   std::string input = "no-cache=\"foo, bar\", private";
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   StringTokenizer t(input, ", ");
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   t.set_quote_chars("\"");
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   while (t.GetNext()) {
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     printf("%s\n", t.token().c_str());
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   }
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Output:
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   no-cache="foo, bar"
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   private
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EXAMPLE 3:
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   bool next_is_option = false, next_is_value = false;
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   std::string input = "text/html; charset=UTF-8; foo=bar";
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   StringTokenizer t(input, "; =");
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   t.set_options(StringTokenizer::RETURN_DELIMS);
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   while (t.GetNext()) {
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     if (t.token_is_delim()) {
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       switch (*t.token_begin()) {
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//         case ';':
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//           next_is_option = true;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//           break;
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//         case '=':
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//           next_is_value = true;
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//           break;
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       }
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     } else {
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       const char* label;
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       if (next_is_option) {
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//         label = "option-name";
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//         next_is_option = false;
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       } else if (next_is_value) {
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//         label = "option-value";
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//         next_is_value = false;
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       } else {
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//         label = "mime-type";
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       }
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//       printf("%s: %s\n", label, t.token().c_str());
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//     }
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   }
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)template <class str, class const_iterator>
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class StringTokenizerT {
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef typename str::value_type char_type;
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Options that may be pass to set_options()
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum {
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Specifies the delimiters should be returned as tokens
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    RETURN_DELIMS = 1 << 0,
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The string object must live longer than the tokenizer.  (In particular this
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // should not be constructed with a temporary.)
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringTokenizerT(const str& string,
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   const str& delims) {
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Init(string.begin(), string.end(), delims);
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  StringTokenizerT(const_iterator string_begin,
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   const_iterator string_end,
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                   const str& delims) {
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Init(string_begin, string_end, delims);
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set the options for this tokenizer.  By default, this is 0.
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void set_options(int options) { options_ = options; }
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set the characters to regard as quotes.  By default, this is empty.  When
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a quote char is encountered, the tokenizer will switch into a mode where
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // it ignores delimiters that it finds.  It switches out of this mode once it
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // finds another instance of the quote char.  If a backslash is encountered
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // within a quoted string, then the next character is skipped.
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void set_quote_chars(const str& quotes) { quotes_ = quotes; }
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Call this method to advance the tokenizer to the next delimiter.  This
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // returns false if the tokenizer is complete.  This method must be called
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // before calling any of the token* methods.
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool GetNext() {
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (quotes_.empty() && options_ == 0)
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return QuickGetNext();
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    else
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return FullGetNext();
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Start iterating through tokens from the beginning of the string.
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Reset() {
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    token_end_ = start_pos_;
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if token is a delimiter.  When the tokenizer is constructed
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // with the RETURN_DELIMS option, this method can be used to check if the
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // returned token is actually a delimiter.
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool token_is_delim() const { return token_is_delim_; }
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If GetNext() returned true, then these methods may be used to read the
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // value of the token.
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const_iterator token_begin() const { return token_begin_; }
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const_iterator token_end() const { return token_end_; }
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  str token() const { return str(token_begin_, token_end_); }
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::StringPiece token_piece() const {
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return base::StringPiece(&*token_begin_,
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             std::distance(token_begin_, token_end_));
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void Init(const_iterator string_begin,
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            const_iterator string_end,
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            const str& delims) {
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    start_pos_ = string_begin;
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    token_begin_ = string_begin;
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    token_end_ = string_begin;
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    end_ = string_end;
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delims_ = delims;
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    options_ = 0;
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    token_is_delim_ = false;
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Implementation of GetNext() for when we have no quote characters. We have
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // two separate implementations because AdvanceOne() is a hot spot in large
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // text files with large tokens.
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool QuickGetNext() {
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    token_is_delim_ = false;
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (;;) {
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      token_begin_ = token_end_;
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (token_end_ == end_)
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++token_end_;
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (delims_.find(*token_begin_) == str::npos)
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // else skip over delimiter.
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (token_end_ != end_ && delims_.find(*token_end_) == str::npos)
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++token_end_;
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Implementation of GetNext() for when we have to take quotes into account.
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool FullGetNext() {
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AdvanceState state;
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    token_is_delim_ = false;
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (;;) {
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      token_begin_ = token_end_;
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (token_end_ == end_)
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++token_end_;
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (AdvanceOne(&state, *token_begin_))
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (options_ & RETURN_DELIMS) {
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        token_is_delim_ = true;
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return true;
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // else skip over delimiter.
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (token_end_ != end_ && AdvanceOne(&state, *token_end_))
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++token_end_;
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool IsDelim(char_type c) const {
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return delims_.find(c) != str::npos;
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool IsQuote(char_type c) const {
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return quotes_.find(c) != str::npos;
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct AdvanceState {
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool in_quote;
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool in_escape;
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    char_type quote_char;
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AdvanceState() : in_quote(false), in_escape(false), quote_char('\0') {}
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns true if a delimiter was not hit.
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool AdvanceOne(AdvanceState* state, char_type c) {
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (state->in_quote) {
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (state->in_escape) {
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        state->in_escape = false;
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else if (c == '\\') {
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        state->in_escape = true;
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else if (c == state->quote_char) {
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        state->in_quote = false;
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (IsDelim(c))
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return false;
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      state->in_quote = IsQuote(state->quote_char = c);
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const_iterator start_pos_;
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const_iterator token_begin_;
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const_iterator token_end_;
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const_iterator end_;
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  str delims_;
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  str quotes_;
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int options_;
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool token_is_delim_;
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef StringTokenizerT<std::string, std::string::const_iterator>
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    StringTokenizer;
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef StringTokenizerT<std::wstring, std::wstring::const_iterator>
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WStringTokenizer;
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef StringTokenizerT<std::string, const char*> CStringTokenizer;
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}  // namespace base
2592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#endif  // BASE_STRINGS_STRING_TOKENIZER_H_
261