1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_split.h"
6
7#include "base/logging.h"
8#include "base/strings/string_util.h"
9#include "base/strings/utf_string_conversions.h"
10#include "base/third_party/icu/icu_utf.h"
11
12namespace base {
13
14namespace {
15
16template <typename STR>
17void SplitStringT(const STR& str,
18                  const typename STR::value_type s,
19                  bool trim_whitespace,
20                  std::vector<STR>* r) {
21  r->clear();
22  size_t last = 0;
23  size_t c = str.size();
24  for (size_t i = 0; i <= c; ++i) {
25    if (i == c || str[i] == s) {
26      STR tmp(str, last, i - last);
27      if (trim_whitespace)
28        TrimWhitespace(tmp, TRIM_ALL, &tmp);
29      // Avoid converting an empty or all-whitespace source string into a vector
30      // of one empty string.
31      if (i != c || !r->empty() || !tmp.empty())
32        r->push_back(tmp);
33      last = i + 1;
34    }
35  }
36}
37
38bool SplitStringIntoKeyValue(const std::string& line,
39                             char key_value_delimiter,
40                             std::string* key,
41                             std::string* value) {
42  key->clear();
43  value->clear();
44
45  // Find the delimiter.
46  size_t end_key_pos = line.find_first_of(key_value_delimiter);
47  if (end_key_pos == std::string::npos) {
48    DVLOG(1) << "cannot find delimiter in: " << line;
49    return false;    // no delimiter
50  }
51  key->assign(line, 0, end_key_pos);
52
53  // Find the value string.
54  std::string remains(line, end_key_pos, line.size() - end_key_pos);
55  size_t begin_value_pos = remains.find_first_not_of(key_value_delimiter);
56  if (begin_value_pos == std::string::npos) {
57    DVLOG(1) << "cannot parse value from line: " << line;
58    return false;   // no value
59  }
60  value->assign(remains, begin_value_pos, remains.size() - begin_value_pos);
61  return true;
62}
63
64template <typename STR>
65void SplitStringUsingSubstrT(const STR& str,
66                                    const STR& s,
67                                    std::vector<STR>* r) {
68  r->clear();
69  typename STR::size_type begin_index = 0;
70  while (true) {
71    const typename STR::size_type end_index = str.find(s, begin_index);
72    if (end_index == STR::npos) {
73      const STR term = str.substr(begin_index);
74      STR tmp;
75      TrimWhitespace(term, TRIM_ALL, &tmp);
76      r->push_back(tmp);
77      return;
78    }
79    const STR term = str.substr(begin_index, end_index - begin_index);
80    STR tmp;
81    TrimWhitespace(term, TRIM_ALL, &tmp);
82    r->push_back(tmp);
83    begin_index = end_index + s.size();
84  }
85}
86
87template<typename STR>
88void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
89  result->clear();
90  const size_t length = str.length();
91  if (!length)
92    return;
93
94  bool last_was_ws = false;
95  size_t last_non_ws_start = 0;
96  for (size_t i = 0; i < length; ++i) {
97    switch (str[i]) {
98      // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
99      case L' ':
100      case L'\t':
101      case L'\xA':
102      case L'\xB':
103      case L'\xC':
104      case L'\xD':
105        if (!last_was_ws) {
106          if (i > 0) {
107            result->push_back(
108                str.substr(last_non_ws_start, i - last_non_ws_start));
109          }
110          last_was_ws = true;
111        }
112        break;
113
114      default:  // Not a space character.
115        if (last_was_ws) {
116          last_was_ws = false;
117          last_non_ws_start = i;
118        }
119        break;
120    }
121  }
122  if (!last_was_ws) {
123    result->push_back(
124        str.substr(last_non_ws_start, length - last_non_ws_start));
125  }
126}
127
128}  // namespace
129
130void SplitString(const string16& str,
131                 char16 c,
132                 std::vector<string16>* r) {
133  DCHECK(CBU16_IS_SINGLE(c));
134  SplitStringT(str, c, true, r);
135}
136
137void SplitString(const std::string& str,
138                 char c,
139                 std::vector<std::string>* r) {
140#if CHAR_MIN < 0
141  DCHECK(c >= 0);
142#endif
143  DCHECK(c < 0x7F);
144  SplitStringT(str, c, true, r);
145}
146
147bool SplitStringIntoKeyValuePairs(const std::string& line,
148                                  char key_value_delimiter,
149                                  char key_value_pair_delimiter,
150                                  StringPairs* key_value_pairs) {
151  key_value_pairs->clear();
152
153  std::vector<std::string> pairs;
154  SplitString(line, key_value_pair_delimiter, &pairs);
155
156  bool success = true;
157  for (size_t i = 0; i < pairs.size(); ++i) {
158    // Don't add empty pairs into the result.
159    if (pairs[i].empty())
160      continue;
161
162    std::string key;
163    std::string value;
164    if (!SplitStringIntoKeyValue(pairs[i], key_value_delimiter, &key, &value)) {
165      // Don't return here, to allow for pairs without associated
166      // value or key; just record that the split failed.
167      success = false;
168    }
169    key_value_pairs->push_back(make_pair(key, value));
170  }
171  return success;
172}
173
174void SplitStringUsingSubstr(const string16& str,
175                            const string16& s,
176                            std::vector<string16>* r) {
177  SplitStringUsingSubstrT(str, s, r);
178}
179
180void SplitStringUsingSubstr(const std::string& str,
181                            const std::string& s,
182                            std::vector<std::string>* r) {
183  SplitStringUsingSubstrT(str, s, r);
184}
185
186void SplitStringDontTrim(const string16& str,
187                         char16 c,
188                         std::vector<string16>* r) {
189  DCHECK(CBU16_IS_SINGLE(c));
190  SplitStringT(str, c, false, r);
191}
192
193void SplitStringDontTrim(const std::string& str,
194                         char c,
195                         std::vector<std::string>* r) {
196  DCHECK(IsStringUTF8(str));
197#if CHAR_MIN < 0
198  DCHECK(c >= 0);
199#endif
200  DCHECK(c < 0x7F);
201  SplitStringT(str, c, false, r);
202}
203
204void SplitStringAlongWhitespace(const string16& str,
205                                std::vector<string16>* result) {
206  SplitStringAlongWhitespaceT(str, result);
207}
208
209void SplitStringAlongWhitespace(const std::string& str,
210                                std::vector<std::string>* result) {
211  SplitStringAlongWhitespaceT(str, result);
212}
213
214}  // namespace base
215