1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_split.h"
6
7#include <stddef.h>
8
9#include "base/logging.h"
10#include "base/strings/string_util.h"
11#include "base/third_party/icu/icu_utf.h"
12
13namespace base {
14
15namespace {
16
17// PieceToOutputType converts a StringPiece as needed to a given output type,
18// which is either the same type of StringPiece (a NOP) or the corresponding
19// non-piece string type.
20//
21// The default converter is a NOP, it works when the OutputType is the
22// correct StringPiece.
23template<typename Str, typename OutputType>
24OutputType PieceToOutputType(BasicStringPiece<Str> piece) {
25  return piece;
26}
27template<>  // Convert StringPiece to std::string
28std::string PieceToOutputType<std::string, std::string>(StringPiece piece) {
29  return piece.as_string();
30}
31template<>  // Convert StringPiece16 to string16.
32string16 PieceToOutputType<string16, string16>(StringPiece16 piece) {
33  return piece.as_string();
34}
35
36// Returns either the ASCII or UTF-16 whitespace.
37template<typename Str> BasicStringPiece<Str> WhitespaceForType();
38template<> StringPiece16 WhitespaceForType<string16>() {
39  return kWhitespaceUTF16;
40}
41template<> StringPiece WhitespaceForType<std::string>() {
42  return kWhitespaceASCII;
43}
44
45// Optimize the single-character case to call find() on the string instead,
46// since this is the common case and can be made faster. This could have been
47// done with template specialization too, but would have been less clear.
48//
49// There is no corresponding FindFirstNotOf because StringPiece already
50// implements these different versions that do the optimized searching.
51size_t FindFirstOf(StringPiece piece, char c, size_t pos) {
52  return piece.find(c, pos);
53}
54size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) {
55  return piece.find(c, pos);
56}
57size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) {
58  return piece.find_first_of(one_of, pos);
59}
60size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) {
61  return piece.find_first_of(one_of, pos);
62}
63
64// General string splitter template. Can take 8- or 16-bit input, can produce
65// the corresponding string or StringPiece output, and can take single- or
66// multiple-character delimiters.
67//
68// DelimiterType is either a character (Str::value_type) or a string piece of
69// multiple characters (BasicStringPiece<Str>). StringPiece has a version of
70// find for both of these cases, and the single-character version is the most
71// common and can be implemented faster, which is why this is a template.
72template<typename Str, typename OutputStringType, typename DelimiterType>
73static std::vector<OutputStringType> SplitStringT(
74    BasicStringPiece<Str> str,
75    DelimiterType delimiter,
76    WhitespaceHandling whitespace,
77    SplitResult result_type) {
78  std::vector<OutputStringType> result;
79  if (str.empty())
80    return result;
81
82  size_t start = 0;
83  while (start != Str::npos) {
84    size_t end = FindFirstOf(str, delimiter, start);
85
86    BasicStringPiece<Str> piece;
87    if (end == Str::npos) {
88      piece = str.substr(start);
89      start = Str::npos;
90    } else {
91      piece = str.substr(start, end - start);
92      start = end + 1;
93    }
94
95    if (whitespace == TRIM_WHITESPACE)
96      piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
97
98    if (result_type == SPLIT_WANT_ALL || !piece.empty())
99      result.push_back(PieceToOutputType<Str, OutputStringType>(piece));
100  }
101  return result;
102}
103
104bool AppendStringKeyValue(StringPiece input,
105                          char delimiter,
106                          StringPairs* result) {
107  // Always append a new item regardless of success (it might be empty). The
108  // below code will copy the strings directly into the result pair.
109  result->resize(result->size() + 1);
110  auto& result_pair = result->back();
111
112  // Find the delimiter.
113  size_t end_key_pos = input.find_first_of(delimiter);
114  if (end_key_pos == std::string::npos) {
115    DVLOG(1) << "cannot find delimiter in: " << input;
116    return false;    // No delimiter.
117  }
118  input.substr(0, end_key_pos).CopyToString(&result_pair.first);
119
120  // Find the value string.
121  StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos);
122  size_t begin_value_pos = remains.find_first_not_of(delimiter);
123  if (begin_value_pos == StringPiece::npos) {
124    DVLOG(1) << "cannot parse value from input: " << input;
125    return false;   // No value.
126  }
127  remains.substr(begin_value_pos, remains.size() - begin_value_pos)
128      .CopyToString(&result_pair.second);
129
130  return true;
131}
132
133template <typename Str, typename OutputStringType>
134void SplitStringUsingSubstrT(BasicStringPiece<Str> input,
135                             BasicStringPiece<Str> delimiter,
136                             WhitespaceHandling whitespace,
137                             SplitResult result_type,
138                             std::vector<OutputStringType>* result) {
139  using Piece = BasicStringPiece<Str>;
140  using size_type = typename Piece::size_type;
141
142  result->clear();
143  for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos;
144       begin_index = end_index + delimiter.size()) {
145    end_index = input.find(delimiter, begin_index);
146    Piece term = end_index == Piece::npos
147                     ? input.substr(begin_index)
148                     : input.substr(begin_index, end_index - begin_index);
149
150    if (whitespace == TRIM_WHITESPACE)
151      term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL);
152
153    if (result_type == SPLIT_WANT_ALL || !term.empty())
154      result->push_back(PieceToOutputType<Str, OutputStringType>(term));
155  }
156}
157
158}  // namespace
159
160std::vector<std::string> SplitString(StringPiece input,
161                                     StringPiece separators,
162                                     WhitespaceHandling whitespace,
163                                     SplitResult result_type) {
164  if (separators.size() == 1) {
165    return SplitStringT<std::string, std::string, char>(
166        input, separators[0], whitespace, result_type);
167  }
168  return SplitStringT<std::string, std::string, StringPiece>(
169      input, separators, whitespace, result_type);
170}
171
172std::vector<string16> SplitString(StringPiece16 input,
173                                  StringPiece16 separators,
174                                  WhitespaceHandling whitespace,
175                                  SplitResult result_type) {
176  if (separators.size() == 1) {
177    return SplitStringT<string16, string16, char16>(
178        input, separators[0], whitespace, result_type);
179  }
180  return SplitStringT<string16, string16, StringPiece16>(
181      input, separators, whitespace, result_type);
182}
183
184std::vector<StringPiece> SplitStringPiece(StringPiece input,
185                                          StringPiece separators,
186                                          WhitespaceHandling whitespace,
187                                          SplitResult result_type) {
188  if (separators.size() == 1) {
189    return SplitStringT<std::string, StringPiece, char>(
190        input, separators[0], whitespace, result_type);
191  }
192  return SplitStringT<std::string, StringPiece, StringPiece>(
193      input, separators, whitespace, result_type);
194}
195
196std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,
197                                            StringPiece16 separators,
198                                            WhitespaceHandling whitespace,
199                                            SplitResult result_type) {
200  if (separators.size() == 1) {
201    return SplitStringT<string16, StringPiece16, char16>(
202        input, separators[0], whitespace, result_type);
203  }
204  return SplitStringT<string16, StringPiece16, StringPiece16>(
205      input, separators, whitespace, result_type);
206}
207
208bool SplitStringIntoKeyValuePairs(StringPiece input,
209                                  char key_value_delimiter,
210                                  char key_value_pair_delimiter,
211                                  StringPairs* key_value_pairs) {
212  key_value_pairs->clear();
213
214  std::vector<StringPiece> pairs = SplitStringPiece(
215      input, std::string(1, key_value_pair_delimiter),
216      TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
217  key_value_pairs->reserve(pairs.size());
218
219  bool success = true;
220  for (const StringPiece& pair : pairs) {
221    if (!AppendStringKeyValue(pair, key_value_delimiter, key_value_pairs)) {
222      // Don't return here, to allow for pairs without associated
223      // value or key; just record that the split failed.
224      success = false;
225    }
226  }
227  return success;
228}
229
230void SplitStringUsingSubstr(StringPiece16 input,
231                            StringPiece16 delimiter,
232                            std::vector<string16>* result) {
233  SplitStringUsingSubstrT(input, delimiter, TRIM_WHITESPACE, SPLIT_WANT_ALL,
234                          result);
235}
236
237void SplitStringUsingSubstr(StringPiece input,
238                            StringPiece delimiter,
239                            std::vector<std::string>* result) {
240  SplitStringUsingSubstrT(input, delimiter, TRIM_WHITESPACE, SPLIT_WANT_ALL,
241                          result);
242}
243
244std::vector<StringPiece16> SplitStringPieceUsingSubstr(
245    StringPiece16 input,
246    StringPiece16 delimiter,
247    WhitespaceHandling whitespace,
248    SplitResult result_type) {
249  std::vector<StringPiece16> result;
250  SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
251  return result;
252}
253
254std::vector<StringPiece> SplitStringPieceUsingSubstr(
255    StringPiece input,
256    StringPiece delimiter,
257    WhitespaceHandling whitespace,
258    SplitResult result_type) {
259  std::vector<StringPiece> result;
260  SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
261  return result;
262}
263
264}  // namespace base
265