string_util.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// Use of this source code is governed by a BSD-style license that can be
3231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// found in the LICENSE file.
4231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
5231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/string_util.h"
6231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
7231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "build/build_config.h"
8231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
9231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <ctype.h>
10231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <errno.h>
11231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <math.h>
12231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <stdarg.h>
13231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <stdio.h>
14231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <stdlib.h>
15231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <string.h>
16231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <time.h>
17231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <wchar.h>
18231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <wctype.h>
19231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
20231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <algorithm>
21231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <vector>
22231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
23231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/basictypes.h"
24231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/logging.h"
25231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/singleton.h"
26231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/third_party/dmg_fp/dmg_fp.h"
27231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/utf_string_conversion_utils.h"
28231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/third_party/icu/icu_utf.h"
29231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
30231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blocknamespace {
31231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
32231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// Force the singleton used by Empty[W]String[16] to be a unique type. This
33231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// prevents other code that might accidentally use Singleton<string> from
34231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// getting our internal one.
35231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockstruct EmptyStrings {
36231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  EmptyStrings() {}
37231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  const std::string s;
38231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  const std::wstring ws;
39231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  const string16 s16;
40231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block};
41231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
42231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// Used by ReplaceStringPlaceholders to track the position in the string of
43231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// replaced parameters.
44231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockstruct ReplacementOffset {
45231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  ReplacementOffset(uintptr_t parameter, size_t offset)
46231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      : parameter(parameter),
47231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block        offset(offset) {}
48231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
49231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  // Index of the parameter.
50231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  uintptr_t parameter;
51231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
52231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  // Starting position in the string.
53231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  size_t offset;
54231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block};
55231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
56231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockstatic bool CompareParameter(const ReplacementOffset& elem1,
57231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                             const ReplacementOffset& elem2) {
58231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  return elem1.parameter < elem2.parameter;
59231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}
60231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
61231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// Generalized string-to-number conversion.
62231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block//
63231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// StringToNumberTraits should provide:
64231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block//  - a typedef for string_type, the STL string type used as input.
65231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block//  - a typedef for value_type, the target numeric type.
66231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block//  - a static function, convert_func, which dispatches to an appropriate
67231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block//    strtol-like function and returns type value_type.
68231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block//  - a static function, valid_func, which validates |input| and returns a bool
69231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block//    indicating whether it is in proper form.  This is used to check for
70231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block//    conditions that convert_func tolerates but should result in
71231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block//    StringToNumber returning false.  For strtol-like funtions, valid_func
72231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block//    should check for leading whitespace.
73231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blocktemplate<typename StringToNumberTraits>
74231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockbool StringToNumber(const typename StringToNumberTraits::string_type& input,
75231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                    typename StringToNumberTraits::value_type* output) {
76231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef StringToNumberTraits traits;
77cac0f67c402d107cdb10971b95719e2ff9c7c76bSteve Block
78231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  errno = 0;  // Thread-safe?  It is on at least Mac, Linux, and Windows.
79231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typename traits::string_type::value_type* endptr = NULL;
80231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typename traits::value_type value = traits::convert_func(input.c_str(),
81231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                                                           &endptr);
82231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  *output = value;
83231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
84231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  // Cases to return false:
85231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  //  - If errno is ERANGE, there was an overflow or underflow.
86231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  //  - If the input string is empty, there was nothing to parse.
87231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  //  - If endptr does not point to the end of the string, there are either
88231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  //    characters remaining in the string after a parsed number, or the string
89231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  //    does not begin with a parseable number.  endptr is compared to the
90231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  //    expected end given the string's stated length to correctly catch cases
91231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  //    where the string contains embedded NUL characters.
92231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  //  - valid_func determines that the input is not in preferred form.
93231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  return errno == 0 &&
94231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block         !input.empty() &&
95231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block         input.c_str() + input.length() == endptr &&
96231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block         traits::valid_func(input);
97231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}
98231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
99231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockstatic int strtoi(const char *nptr, char **endptr, int base) {
100231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  long res = strtol(nptr, endptr, base);
101231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#if __LP64__
102231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  // Long is 64-bits, we have to handle under/overflow ourselves.
103231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  if (res > kint32max) {
104231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    res = kint32max;
105231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    errno = ERANGE;
106231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  } else if (res < kint32min) {
107231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    res = kint32min;
108231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    errno = ERANGE;
109231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
110231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif
111231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  return static_cast<int>(res);
112231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}
113231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
114231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockstatic unsigned int strtoui(const char *nptr, char **endptr, int base) {
115231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  unsigned long res = strtoul(nptr, endptr, base);
116231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#if __LP64__
117231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  // Long is 64-bits, we have to handle under/overflow ourselves.  Test to see
118231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  // if the result can fit into 32-bits (as signed or unsigned).
119231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  if (static_cast<int>(static_cast<long>(res)) != static_cast<long>(res) &&
120231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      static_cast<unsigned int>(res) != res) {
121231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    res = kuint32max;
122231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    errno = ERANGE;
123231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
124231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif
125231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  return static_cast<unsigned int>(res);
126231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}
127231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
128231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass StringToIntTraits {
129231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public:
130231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef std::string string_type;
131231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef int value_type;
132231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static const int kBase = 10;
133231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline value_type convert_func(const string_type::value_type* str,
134231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                                        string_type::value_type** endptr) {
135231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return strtoi(str, endptr, kBase);
136231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
137231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline bool valid_func(const string_type& str) {
138231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return !str.empty() && !isspace(str[0]);
139231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
140231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block};
141231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
142231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass String16ToIntTraits {
143231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public:
144231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef string16 string_type;
145231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef int value_type;
146231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static const int kBase = 10;
147231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline value_type convert_func(const string_type::value_type* str,
148231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                                        string_type::value_type** endptr) {
149231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#if defined(WCHAR_T_IS_UTF16)
150231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return wcstol(str, endptr, kBase);
151231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#elif defined(WCHAR_T_IS_UTF32)
152231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    std::string ascii_string = UTF16ToASCII(string16(str));
153231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    char* ascii_end = NULL;
154231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    value_type ret = strtoi(ascii_string.c_str(), &ascii_end, kBase);
155231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
156231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      *endptr =
157231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          const_cast<string_type::value_type*>(str) + ascii_string.length();
158231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    }
159231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return ret;
160231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif
161231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
162231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline bool valid_func(const string_type& str) {
163231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return !str.empty() && !iswspace(str[0]);
164231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
165231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block};
166231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
167231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass StringToInt64Traits {
168231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public:
169231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef std::string string_type;
170231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef int64 value_type;
171231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static const int kBase = 10;
172231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline value_type convert_func(const string_type::value_type* str,
173231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                                        string_type::value_type** endptr) {
174231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#ifdef OS_WIN
175231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return _strtoi64(str, endptr, kBase);
176231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#else  // assume OS_POSIX
177231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return strtoll(str, endptr, kBase);
178231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif
179231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
180231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline bool valid_func(const string_type& str) {
181231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return !str.empty() && !isspace(str[0]);
182231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
183231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block};
184231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
185231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass String16ToInt64Traits {
186231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public:
187231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef string16 string_type;
188231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef int64 value_type;
189231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static const int kBase = 10;
190231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline value_type convert_func(const string_type::value_type* str,
191231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                                        string_type::value_type** endptr) {
192231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#ifdef OS_WIN
193231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return _wcstoi64(str, endptr, kBase);
194231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#else  // assume OS_POSIX
195231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    std::string ascii_string = UTF16ToASCII(string16(str));
196231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    char* ascii_end = NULL;
197231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    value_type ret = strtoll(ascii_string.c_str(), &ascii_end, kBase);
198231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
199231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      *endptr =
200231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          const_cast<string_type::value_type*>(str) + ascii_string.length();
201231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    }
202231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return ret;
203231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif
204231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
205231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline bool valid_func(const string_type& str) {
206231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return !str.empty() && !iswspace(str[0]);
207231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
208231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block};
209231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
210231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// For the HexString variants, use the unsigned variants like strtoul for
211231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// convert_func so that input like "0x80000000" doesn't result in an overflow.
212231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
213231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass HexStringToIntTraits {
214231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public:
215231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef std::string string_type;
216231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef int value_type;
217231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static const int kBase = 16;
218231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline value_type convert_func(const string_type::value_type* str,
219231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                                        string_type::value_type** endptr) {
220231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return strtoui(str, endptr, kBase);
221231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
222231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline bool valid_func(const string_type& str) {
223231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return !str.empty() && !isspace(str[0]);
224231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
225231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block};
226231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
227231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass HexString16ToIntTraits {
228231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public:
229231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef string16 string_type;
230231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef int value_type;
231231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static const int kBase = 16;
232231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline value_type convert_func(const string_type::value_type* str,
23321939df44de1705786c545cd1bf519d47250322dBen Murdoch                                        string_type::value_type** endptr) {
234231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#if defined(WCHAR_T_IS_UTF16)
23521939df44de1705786c545cd1bf519d47250322dBen Murdoch    return wcstoul(str, endptr, kBase);
236231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#elif defined(WCHAR_T_IS_UTF32)
23721939df44de1705786c545cd1bf519d47250322dBen Murdoch    std::string ascii_string = UTF16ToASCII(string16(str));
238231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    char* ascii_end = NULL;
23921939df44de1705786c545cd1bf519d47250322dBen Murdoch    value_type ret = strtoui(ascii_string.c_str(), &ascii_end, kBase);
240231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
241231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      *endptr =
242231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          const_cast<string_type::value_type*>(str) + ascii_string.length();
243231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    }
244231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return ret;
245231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif
246231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
247231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline bool valid_func(const string_type& str) {
248231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return !str.empty() && !iswspace(str[0]);
249231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
250231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block};
251231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
252231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass StringToDoubleTraits {
253231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public:
254231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef std::string string_type;
255231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef double value_type;
256231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline value_type convert_func(const string_type::value_type* str,
257231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                                        string_type::value_type** endptr) {
258231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return dmg_fp::strtod(str, endptr);
259231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
260231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline bool valid_func(const string_type& str) {
261231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return !str.empty() && !isspace(str[0]);
262231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
263231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block};
264231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
265231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass String16ToDoubleTraits {
266231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public:
267231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef string16 string_type;
268231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  typedef double value_type;
269231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline value_type convert_func(const string_type::value_type* str,
270231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                                        string_type::value_type** endptr) {
271231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    // Because dmg_fp::strtod does not like char16, we convert it to ASCII.
272231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    // In theory, this should be safe, but it's possible that 16-bit chars
273231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    // might get ignored by accident causing something to be parsed when it
274231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    // shouldn't.
275231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    std::string ascii_string = UTF16ToASCII(string16(str));
276231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    char* ascii_end = NULL;
277231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    value_type ret = dmg_fp::strtod(ascii_string.c_str(), &ascii_end);
278231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    if (ascii_string.c_str() + ascii_string.length() == ascii_end) {
279231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      // Put endptr at end of input string, so it's not recognized as an error.
280231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      *endptr =
281231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          const_cast<string_type::value_type*>(str) + ascii_string.length();
282231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    }
283231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
284231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return ret;
285231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
286231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  static inline bool valid_func(const string_type& str) {
287231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    return !str.empty() && !iswspace(str[0]);
288231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
289231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block};
290231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
291231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}  // namespace
292231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
293231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
294231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blocknamespace base {
295231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
296231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockbool IsWprintfFormatPortable(const wchar_t* format) {
297231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  for (const wchar_t* position = format; *position != '\0'; ++position) {
298231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    if (*position == '%') {
299231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      bool in_specification = true;
300231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      bool modifier_l = false;
301231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      while (in_specification) {
302231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block        // Eat up characters until reaching a known specifier.
303231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block        if (*++position == '\0') {
304231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          // The format string ended in the middle of a specification.  Call
305231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          // it portable because no unportable specifications were found.  The
306231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          // string is equally broken on all platforms.
307231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          return true;
308231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block        }
309231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
310231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block        if (*position == 'l') {
311231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          // 'l' is the only thing that can save the 's' and 'c' specifiers.
312231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          modifier_l = true;
313231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block        } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
314231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                   *position == 'S' || *position == 'C' || *position == 'F' ||
315231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block                   *position == 'D' || *position == 'O' || *position == 'U') {
316231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          // Not portable.
317231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          return false;
318231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block        }
319231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
320231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block        if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
321231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          // Portable, keep scanning the rest of the format string.
322231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block          in_specification = false;
323231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block        }
324231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block      }
325231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block    }
326231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  }
327231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
328231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  return true;
329231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}
330231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
331231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
332231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}  // namespace base
333231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
334231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
335231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockconst std::string& EmptyString() {
336231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  return Singleton<EmptyStrings>::get()->s;
337231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}
338231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
339231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockconst std::wstring& EmptyWString() {
340231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  return Singleton<EmptyStrings>::get()->ws;
341231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}
342231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
343231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockconst string16& EmptyString16() {
344231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  return Singleton<EmptyStrings>::get()->s16;
345231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}
346231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block
347231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#define WHITESPACE_UNICODE \
348231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x0009, /* <control-0009> to <control-000D> */ \
349231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x000A,                                        \
350231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x000B,                                        \
351231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x000C,                                        \
352231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x000D,                                        \
353231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x0020, /* Space */                            \
354231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x0085, /* <control-0085> */                   \
355231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x00A0, /* No-Break Space */                   \
356231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x1680, /* Ogham Space Mark */                 \
357231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x180E, /* Mongolian Vowel Separator */        \
358231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x2000, /* En Quad to Hair Space */            \
359231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x2001,                                        \
360231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x2002,                                        \
361231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x2003,                                        \
362231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x2004,                                        \
363231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block  0x2005,                                        \
364  0x2006,                                        \
365  0x2007,                                        \
366  0x2008,                                        \
367  0x2009,                                        \
368  0x200A,                                        \
369  0x200C, /* Zero Width Non-Joiner */            \
370  0x2028, /* Line Separator */                   \
371  0x2029, /* Paragraph Separator */              \
372  0x202F, /* Narrow No-Break Space */            \
373  0x205F, /* Medium Mathematical Space */        \
374  0x3000, /* Ideographic Space */                \
375  0
376
377const wchar_t kWhitespaceWide[] = {
378  WHITESPACE_UNICODE
379};
380const char16 kWhitespaceUTF16[] = {
381  WHITESPACE_UNICODE
382};
383const char kWhitespaceASCII[] = {
384  0x09,    // <control-0009> to <control-000D>
385  0x0A,
386  0x0B,
387  0x0C,
388  0x0D,
389  0x20,    // Space
390  0
391};
392
393const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF";
394
395template<typename STR>
396bool RemoveCharsT(const STR& input,
397                  const typename STR::value_type remove_chars[],
398                  STR* output) {
399  bool removed = false;
400  size_t found;
401
402  *output = input;
403
404  found = output->find_first_of(remove_chars);
405  while (found != STR::npos) {
406    removed = true;
407    output->replace(found, 1, STR());
408    found = output->find_first_of(remove_chars, found);
409  }
410
411  return removed;
412}
413
414bool RemoveChars(const std::wstring& input,
415                 const wchar_t remove_chars[],
416                 std::wstring* output) {
417  return RemoveCharsT(input, remove_chars, output);
418}
419
420#if !defined(WCHAR_T_IS_UTF16)
421bool RemoveChars(const string16& input,
422                 const char16 remove_chars[],
423                 string16* output) {
424  return RemoveCharsT(input, remove_chars, output);
425}
426#endif
427
428bool RemoveChars(const std::string& input,
429                 const char remove_chars[],
430                 std::string* output) {
431  return RemoveCharsT(input, remove_chars, output);
432}
433
434template<typename STR>
435TrimPositions TrimStringT(const STR& input,
436                          const typename STR::value_type trim_chars[],
437                          TrimPositions positions,
438                          STR* output) {
439  // Find the edges of leading/trailing whitespace as desired.
440  const typename STR::size_type last_char = input.length() - 1;
441  const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
442      input.find_first_not_of(trim_chars) : 0;
443  const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
444      input.find_last_not_of(trim_chars) : last_char;
445
446  // When the string was all whitespace, report that we stripped off whitespace
447  // from whichever position the caller was interested in.  For empty input, we
448  // stripped no whitespace, but we still need to clear |output|.
449  if (input.empty() ||
450      (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
451    bool input_was_empty = input.empty();  // in case output == &input
452    output->clear();
453    return input_was_empty ? TRIM_NONE : positions;
454  }
455
456  // Trim the whitespace.
457  *output =
458      input.substr(first_good_char, last_good_char - first_good_char + 1);
459
460  // Return where we trimmed from.
461  return static_cast<TrimPositions>(
462      ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
463      ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
464}
465
466bool TrimString(const std::wstring& input,
467                const wchar_t trim_chars[],
468                std::wstring* output) {
469  return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
470}
471
472#if !defined(WCHAR_T_IS_UTF16)
473bool TrimString(const string16& input,
474                const char16 trim_chars[],
475                string16* output) {
476  return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
477}
478#endif
479
480bool TrimString(const std::string& input,
481                const char trim_chars[],
482                std::string* output) {
483  return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
484}
485
486void TruncateUTF8ToByteSize(const std::string& input,
487                            const size_t byte_size,
488                            std::string* output) {
489  DCHECK(output);
490  if (byte_size > input.length()) {
491    *output = input;
492    return;
493  }
494  DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
495  // Note: This cast is necessary because CBU8_NEXT uses int32s.
496  int32 truncation_length = static_cast<int32>(byte_size);
497  int32 char_index = truncation_length - 1;
498  const char* data = input.data();
499
500  // Using CBU8, we will move backwards from the truncation point
501  // to the beginning of the string looking for a valid UTF8
502  // character.  Once a full UTF8 character is found, we will
503  // truncate the string to the end of that character.
504  while (char_index >= 0) {
505    int32 prev = char_index;
506    uint32 code_point = 0;
507    CBU8_NEXT(data, char_index, truncation_length, code_point);
508    if (!base::IsValidCharacter(code_point) ||
509        !base::IsValidCodepoint(code_point)) {
510      char_index = prev - 1;
511    } else {
512      break;
513    }
514  }
515
516  if (char_index >= 0 )
517    *output = input.substr(0, char_index);
518  else
519    output->clear();
520}
521
522TrimPositions TrimWhitespace(const std::wstring& input,
523                             TrimPositions positions,
524                             std::wstring* output) {
525  return TrimStringT(input, kWhitespaceWide, positions, output);
526}
527
528#if !defined(WCHAR_T_IS_UTF16)
529TrimPositions TrimWhitespace(const string16& input,
530                             TrimPositions positions,
531                             string16* output) {
532  return TrimStringT(input, kWhitespaceUTF16, positions, output);
533}
534#endif
535
536TrimPositions TrimWhitespaceASCII(const std::string& input,
537                                  TrimPositions positions,
538                                  std::string* output) {
539  return TrimStringT(input, kWhitespaceASCII, positions, output);
540}
541
542// This function is only for backward-compatibility.
543// To be removed when all callers are updated.
544TrimPositions TrimWhitespace(const std::string& input,
545                             TrimPositions positions,
546                             std::string* output) {
547  return TrimWhitespaceASCII(input, positions, output);
548}
549
550template<typename STR>
551STR CollapseWhitespaceT(const STR& text,
552                        bool trim_sequences_with_line_breaks) {
553  STR result;
554  result.resize(text.size());
555
556  // Set flags to pretend we're already in a trimmed whitespace sequence, so we
557  // will trim any leading whitespace.
558  bool in_whitespace = true;
559  bool already_trimmed = true;
560
561  int chars_written = 0;
562  for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
563    if (IsWhitespace(*i)) {
564      if (!in_whitespace) {
565        // Reduce all whitespace sequences to a single space.
566        in_whitespace = true;
567        result[chars_written++] = L' ';
568      }
569      if (trim_sequences_with_line_breaks && !already_trimmed &&
570          ((*i == '\n') || (*i == '\r'))) {
571        // Whitespace sequences containing CR or LF are eliminated entirely.
572        already_trimmed = true;
573        --chars_written;
574      }
575    } else {
576      // Non-whitespace chracters are copied straight across.
577      in_whitespace = false;
578      already_trimmed = false;
579      result[chars_written++] = *i;
580    }
581  }
582
583  if (in_whitespace && !already_trimmed) {
584    // Any trailing whitespace is eliminated.
585    --chars_written;
586  }
587
588  result.resize(chars_written);
589  return result;
590}
591
592std::wstring CollapseWhitespace(const std::wstring& text,
593                                bool trim_sequences_with_line_breaks) {
594  return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
595}
596
597#if !defined(WCHAR_T_IS_UTF16)
598string16 CollapseWhitespace(const string16& text,
599                            bool trim_sequences_with_line_breaks) {
600  return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
601}
602#endif
603
604std::string CollapseWhitespaceASCII(const std::string& text,
605                                    bool trim_sequences_with_line_breaks) {
606  return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
607}
608
609bool ContainsOnlyWhitespaceASCII(const std::string& str) {
610  for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
611    if (!IsAsciiWhitespace(*i))
612      return false;
613  }
614  return true;
615}
616
617bool ContainsOnlyWhitespace(const string16& str) {
618  for (string16::const_iterator i(str.begin()); i != str.end(); ++i) {
619    if (!IsWhitespace(*i))
620      return false;
621  }
622  return true;
623}
624
625template<typename STR>
626static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
627  for (typename STR::const_iterator iter = input.begin();
628       iter != input.end(); ++iter) {
629    if (characters.find(*iter) == STR::npos)
630      return false;
631  }
632  return true;
633}
634
635bool ContainsOnlyChars(const std::wstring& input,
636                       const std::wstring& characters) {
637  return ContainsOnlyCharsT(input, characters);
638}
639
640#if !defined(WCHAR_T_IS_UTF16)
641bool ContainsOnlyChars(const string16& input, const string16& characters) {
642  return ContainsOnlyCharsT(input, characters);
643}
644#endif
645
646bool ContainsOnlyChars(const std::string& input,
647                       const std::string& characters) {
648  return ContainsOnlyCharsT(input, characters);
649}
650
651std::string WideToASCII(const std::wstring& wide) {
652  DCHECK(IsStringASCII(wide)) << wide;
653  return std::string(wide.begin(), wide.end());
654}
655
656std::wstring ASCIIToWide(const base::StringPiece& ascii) {
657  DCHECK(IsStringASCII(ascii)) << ascii;
658  return std::wstring(ascii.begin(), ascii.end());
659}
660
661std::string UTF16ToASCII(const string16& utf16) {
662  DCHECK(IsStringASCII(utf16)) << utf16;
663  return std::string(utf16.begin(), utf16.end());
664}
665
666string16 ASCIIToUTF16(const base::StringPiece& ascii) {
667  DCHECK(IsStringASCII(ascii)) << ascii;
668  return string16(ascii.begin(), ascii.end());
669}
670
671// Latin1 is just the low range of Unicode, so we can copy directly to convert.
672bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
673  std::string output;
674  output.resize(wide.size());
675  latin1->clear();
676  for (size_t i = 0; i < wide.size(); i++) {
677    if (wide[i] > 255)
678      return false;
679    output[i] = static_cast<char>(wide[i]);
680  }
681  latin1->swap(output);
682  return true;
683}
684
685bool IsString8Bit(const std::wstring& str) {
686  for (size_t i = 0; i < str.length(); i++) {
687    if (str[i] > 255)
688      return false;
689  }
690  return true;
691}
692
693template<class STR>
694static bool DoIsStringASCII(const STR& str) {
695  for (size_t i = 0; i < str.length(); i++) {
696    typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
697    if (c > 0x7F)
698      return false;
699  }
700  return true;
701}
702
703bool IsStringASCII(const std::wstring& str) {
704  return DoIsStringASCII(str);
705}
706
707#if !defined(WCHAR_T_IS_UTF16)
708bool IsStringASCII(const string16& str) {
709  return DoIsStringASCII(str);
710}
711#endif
712
713bool IsStringASCII(const base::StringPiece& str) {
714  return DoIsStringASCII(str);
715}
716
717bool IsStringUTF8(const std::string& str) {
718  const char *src = str.data();
719  int32 src_len = static_cast<int32>(str.length());
720  int32 char_index = 0;
721
722  while (char_index < src_len) {
723    int32 code_point;
724    CBU8_NEXT(src, char_index, src_len, code_point);
725    if (!base::IsValidCharacter(code_point))
726       return false;
727  }
728  return true;
729}
730
731template<typename Iter>
732static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
733                                          Iter a_end,
734                                          const char* b) {
735  for (Iter it = a_begin; it != a_end; ++it, ++b) {
736    if (!*b || ToLowerASCII(*it) != *b)
737      return false;
738  }
739  return *b == 0;
740}
741
742// Front-ends for LowerCaseEqualsASCII.
743bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
744  return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
745}
746
747bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
748  return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
749}
750
751#if !defined(WCHAR_T_IS_UTF16)
752bool LowerCaseEqualsASCII(const string16& a, const char* b) {
753  return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
754}
755#endif
756
757bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
758                          std::string::const_iterator a_end,
759                          const char* b) {
760  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
761}
762
763bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
764                          std::wstring::const_iterator a_end,
765                          const char* b) {
766  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
767}
768
769#if !defined(WCHAR_T_IS_UTF16)
770bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
771                          string16::const_iterator a_end,
772                          const char* b) {
773  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
774}
775#endif
776
777bool LowerCaseEqualsASCII(const char* a_begin,
778                          const char* a_end,
779                          const char* b) {
780  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
781}
782
783bool LowerCaseEqualsASCII(const wchar_t* a_begin,
784                          const wchar_t* a_end,
785                          const char* b) {
786  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
787}
788
789#if !defined(WCHAR_T_IS_UTF16)
790bool LowerCaseEqualsASCII(const char16* a_begin,
791                          const char16* a_end,
792                          const char* b) {
793  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
794}
795#endif
796
797bool EqualsASCII(const string16& a, const base::StringPiece& b) {
798  if (a.length() != b.length())
799    return false;
800  return std::equal(b.begin(), b.end(), a.begin());
801}
802
803bool StartsWithASCII(const std::string& str,
804                     const std::string& search,
805                     bool case_sensitive) {
806  if (case_sensitive)
807    return str.compare(0, search.length(), search) == 0;
808  else
809    return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
810}
811
812template <typename STR>
813bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
814  if (case_sensitive) {
815    return str.compare(0, search.length(), search) == 0;
816  } else {
817    if (search.size() > str.size())
818      return false;
819    return std::equal(search.begin(), search.end(), str.begin(),
820                      CaseInsensitiveCompare<typename STR::value_type>());
821  }
822}
823
824bool StartsWith(const std::wstring& str, const std::wstring& search,
825                bool case_sensitive) {
826  return StartsWithT(str, search, case_sensitive);
827}
828
829#if !defined(WCHAR_T_IS_UTF16)
830bool StartsWith(const string16& str, const string16& search,
831                bool case_sensitive) {
832  return StartsWithT(str, search, case_sensitive);
833}
834#endif
835
836template <typename STR>
837bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
838  typename STR::size_type str_length = str.length();
839  typename STR::size_type search_length = search.length();
840  if (search_length > str_length)
841    return false;
842  if (case_sensitive) {
843    return str.compare(str_length - search_length, search_length, search) == 0;
844  } else {
845    return std::equal(search.begin(), search.end(),
846                      str.begin() + (str_length - search_length),
847                      CaseInsensitiveCompare<typename STR::value_type>());
848  }
849}
850
851bool EndsWith(const std::string& str, const std::string& search,
852              bool case_sensitive) {
853  return EndsWithT(str, search, case_sensitive);
854}
855
856bool EndsWith(const std::wstring& str, const std::wstring& search,
857              bool case_sensitive) {
858  return EndsWithT(str, search, case_sensitive);
859}
860
861#if !defined(WCHAR_T_IS_UTF16)
862bool EndsWith(const string16& str, const string16& search,
863              bool case_sensitive) {
864  return EndsWithT(str, search, case_sensitive);
865}
866#endif
867
868DataUnits GetByteDisplayUnits(int64 bytes) {
869  // The byte thresholds at which we display amounts.  A byte count is displayed
870  // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
871  // This must match the DataUnits enum.
872  static const int64 kUnitThresholds[] = {
873    0,              // DATA_UNITS_BYTE,
874    3*1024,         // DATA_UNITS_KIBIBYTE,
875    2*1024*1024,    // DATA_UNITS_MEBIBYTE,
876    1024*1024*1024  // DATA_UNITS_GIBIBYTE,
877  };
878
879  if (bytes < 0) {
880    NOTREACHED() << "Negative bytes value";
881    return DATA_UNITS_BYTE;
882  }
883
884  int unit_index = arraysize(kUnitThresholds);
885  while (--unit_index > 0) {
886    if (bytes >= kUnitThresholds[unit_index])
887      break;
888  }
889
890  DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIBIBYTE);
891  return DataUnits(unit_index);
892}
893
894// TODO(mpcomplete): deal with locale
895// Byte suffixes.  This must match the DataUnits enum.
896static const wchar_t* const kByteStrings[] = {
897  L"B",
898  L"kB",
899  L"MB",
900  L"GB"
901};
902
903static const wchar_t* const kSpeedStrings[] = {
904  L"B/s",
905  L"kB/s",
906  L"MB/s",
907  L"GB/s"
908};
909
910std::wstring FormatBytesInternal(int64 bytes,
911                                 DataUnits units,
912                                 bool show_units,
913                                 const wchar_t* const* suffix) {
914  if (bytes < 0) {
915    NOTREACHED() << "Negative bytes value";
916    return std::wstring();
917  }
918
919  DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIBIBYTE);
920
921  // Put the quantity in the right units.
922  double unit_amount = static_cast<double>(bytes);
923  for (int i = 0; i < units; ++i)
924    unit_amount /= 1024.0;
925
926  wchar_t buf[64];
927  if (bytes != 0 && units != DATA_UNITS_BYTE && unit_amount < 100)
928    base::swprintf(buf, arraysize(buf), L"%.1lf", unit_amount);
929  else
930    base::swprintf(buf, arraysize(buf), L"%.0lf", unit_amount);
931
932  std::wstring ret(buf);
933  if (show_units) {
934    ret += L" ";
935    ret += suffix[units];
936  }
937
938  return ret;
939}
940
941std::wstring FormatBytes(int64 bytes, DataUnits units, bool show_units) {
942  return FormatBytesInternal(bytes, units, show_units, kByteStrings);
943}
944
945std::wstring FormatSpeed(int64 bytes, DataUnits units, bool show_units) {
946  return FormatBytesInternal(bytes, units, show_units, kSpeedStrings);
947}
948
949template<class StringType>
950void DoReplaceSubstringsAfterOffset(StringType* str,
951                                    typename StringType::size_type start_offset,
952                                    const StringType& find_this,
953                                    const StringType& replace_with,
954                                    bool replace_all) {
955  if ((start_offset == StringType::npos) || (start_offset >= str->length()))
956    return;
957
958  DCHECK(!find_this.empty());
959  for (typename StringType::size_type offs(str->find(find_this, start_offset));
960      offs != StringType::npos; offs = str->find(find_this, offs)) {
961    str->replace(offs, find_this.length(), replace_with);
962    offs += replace_with.length();
963
964    if (!replace_all)
965      break;
966  }
967}
968
969void ReplaceFirstSubstringAfterOffset(string16* str,
970                                      string16::size_type start_offset,
971                                      const string16& find_this,
972                                      const string16& replace_with) {
973  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
974                                 false);  // replace first instance
975}
976
977void ReplaceFirstSubstringAfterOffset(std::string* str,
978                                      std::string::size_type start_offset,
979                                      const std::string& find_this,
980                                      const std::string& replace_with) {
981  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
982                                 false);  // replace first instance
983}
984
985void ReplaceSubstringsAfterOffset(string16* str,
986                                  string16::size_type start_offset,
987                                  const string16& find_this,
988                                  const string16& replace_with) {
989  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
990                                 true);  // replace all instances
991}
992
993void ReplaceSubstringsAfterOffset(std::string* str,
994                                  std::string::size_type start_offset,
995                                  const std::string& find_this,
996                                  const std::string& replace_with) {
997  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
998                                 true);  // replace all instances
999}
1000
1001// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter
1002// is the size of the buffer. These return the number of characters in the
1003// formatted string excluding the NUL terminator. If the buffer is not
1004// large enough to accommodate the formatted string without truncation, they
1005// return the number of characters that would be in the fully-formatted string
1006// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms).
1007inline int vsnprintfT(char* buffer,
1008                      size_t buf_size,
1009                      const char* format,
1010                      va_list argptr) {
1011  return base::vsnprintf(buffer, buf_size, format, argptr);
1012}
1013
1014inline int vsnprintfT(wchar_t* buffer,
1015                      size_t buf_size,
1016                      const wchar_t* format,
1017                      va_list argptr) {
1018  return base::vswprintf(buffer, buf_size, format, argptr);
1019}
1020
1021// Templatized backend for StringPrintF/StringAppendF. This does not finalize
1022// the va_list, the caller is expected to do that.
1023template <class StringType>
1024static void StringAppendVT(StringType* dst,
1025                           const typename StringType::value_type* format,
1026                           va_list ap) {
1027  // First try with a small fixed size buffer.
1028  // This buffer size should be kept in sync with StringUtilTest.GrowBoundary
1029  // and StringUtilTest.StringPrintfBounds.
1030  typename StringType::value_type stack_buf[1024];
1031
1032  va_list ap_copy;
1033  GG_VA_COPY(ap_copy, ap);
1034
1035#if !defined(OS_WIN)
1036  errno = 0;
1037#endif
1038  int result = vsnprintfT(stack_buf, arraysize(stack_buf), format, ap_copy);
1039  va_end(ap_copy);
1040
1041  if (result >= 0 && result < static_cast<int>(arraysize(stack_buf))) {
1042    // It fit.
1043    dst->append(stack_buf, result);
1044    return;
1045  }
1046
1047  // Repeatedly increase buffer size until it fits.
1048  int mem_length = arraysize(stack_buf);
1049  while (true) {
1050    if (result < 0) {
1051#if !defined(OS_WIN)
1052      // On Windows, vsnprintfT always returns the number of characters in a
1053      // fully-formatted string, so if we reach this point, something else is
1054      // wrong and no amount of buffer-doubling is going to fix it.
1055      if (errno != 0 && errno != EOVERFLOW)
1056#endif
1057      {
1058        // If an error other than overflow occurred, it's never going to work.
1059        DLOG(WARNING) << "Unable to printf the requested string due to error.";
1060        return;
1061      }
1062      // Try doubling the buffer size.
1063      mem_length *= 2;
1064    } else {
1065      // We need exactly "result + 1" characters.
1066      mem_length = result + 1;
1067    }
1068
1069    if (mem_length > 32 * 1024 * 1024) {
1070      // That should be plenty, don't try anything larger.  This protects
1071      // against huge allocations when using vsnprintfT implementations that
1072      // return -1 for reasons other than overflow without setting errno.
1073      DLOG(WARNING) << "Unable to printf the requested string due to size.";
1074      return;
1075    }
1076
1077    std::vector<typename StringType::value_type> mem_buf(mem_length);
1078
1079    // NOTE: You can only use a va_list once.  Since we're in a while loop, we
1080    // need to make a new copy each time so we don't use up the original.
1081    GG_VA_COPY(ap_copy, ap);
1082    result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy);
1083    va_end(ap_copy);
1084
1085    if ((result >= 0) && (result < mem_length)) {
1086      // It fit.
1087      dst->append(&mem_buf[0], result);
1088      return;
1089    }
1090  }
1091}
1092
1093namespace {
1094
1095template <typename STR, typename INT, typename UINT, bool NEG>
1096struct IntToStringT {
1097  // This is to avoid a compiler warning about unary minus on unsigned type.
1098  // For example, say you had the following code:
1099  //   template <typename INT>
1100  //   INT abs(INT value) { return value < 0 ? -value : value; }
1101  // Even though if INT is unsigned, it's impossible for value < 0, so the
1102  // unary minus will never be taken, the compiler will still generate a
1103  // warning.  We do a little specialization dance...
1104  template <typename INT2, typename UINT2, bool NEG2>
1105  struct ToUnsignedT { };
1106
1107  template <typename INT2, typename UINT2>
1108  struct ToUnsignedT<INT2, UINT2, false> {
1109    static UINT2 ToUnsigned(INT2 value) {
1110      return static_cast<UINT2>(value);
1111    }
1112  };
1113
1114  template <typename INT2, typename UINT2>
1115  struct ToUnsignedT<INT2, UINT2, true> {
1116    static UINT2 ToUnsigned(INT2 value) {
1117      return static_cast<UINT2>(value < 0 ? -value : value);
1118    }
1119  };
1120
1121  // This set of templates is very similar to the above templates, but
1122  // for testing whether an integer is negative.
1123  template <typename INT2, bool NEG2>
1124  struct TestNegT { };
1125  template <typename INT2>
1126  struct TestNegT<INT2, false> {
1127    static bool TestNeg(INT2 value) {
1128      // value is unsigned, and can never be negative.
1129      return false;
1130    }
1131  };
1132  template <typename INT2>
1133  struct TestNegT<INT2, true> {
1134    static bool TestNeg(INT2 value) {
1135      return value < 0;
1136    }
1137  };
1138
1139  static STR IntToString(INT value) {
1140    // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
1141    // So round up to allocate 3 output characters per byte, plus 1 for '-'.
1142    const int kOutputBufSize = 3 * sizeof(INT) + 1;
1143
1144    // Allocate the whole string right away, we will right back to front, and
1145    // then return the substr of what we ended up using.
1146    STR outbuf(kOutputBufSize, 0);
1147
1148    bool is_neg = TestNegT<INT, NEG>::TestNeg(value);
1149    // Even though is_neg will never be true when INT is parameterized as
1150    // unsigned, even the presence of the unary operation causes a warning.
1151    UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value);
1152
1153    for (typename STR::iterator it = outbuf.end();;) {
1154      --it;
1155      DCHECK(it != outbuf.begin());
1156      *it = static_cast<typename STR::value_type>((res % 10) + '0');
1157      res /= 10;
1158
1159      // We're done..
1160      if (res == 0) {
1161        if (is_neg) {
1162          --it;
1163          DCHECK(it != outbuf.begin());
1164          *it = static_cast<typename STR::value_type>('-');
1165        }
1166        return STR(it, outbuf.end());
1167      }
1168    }
1169    NOTREACHED();
1170    return STR();
1171  }
1172};
1173
1174}
1175
1176std::string IntToString(int value) {
1177  return IntToStringT<std::string, int, unsigned int, true>::
1178      IntToString(value);
1179}
1180std::wstring IntToWString(int value) {
1181  return IntToStringT<std::wstring, int, unsigned int, true>::
1182      IntToString(value);
1183}
1184string16 IntToString16(int value) {
1185  return IntToStringT<string16, int, unsigned int, true>::
1186      IntToString(value);
1187}
1188std::string UintToString(unsigned int value) {
1189  return IntToStringT<std::string, unsigned int, unsigned int, false>::
1190      IntToString(value);
1191}
1192std::wstring UintToWString(unsigned int value) {
1193  return IntToStringT<std::wstring, unsigned int, unsigned int, false>::
1194      IntToString(value);
1195}
1196string16 UintToString16(unsigned int value) {
1197  return IntToStringT<string16, unsigned int, unsigned int, false>::
1198      IntToString(value);
1199}
1200std::string Int64ToString(int64 value) {
1201  return IntToStringT<std::string, int64, uint64, true>::
1202      IntToString(value);
1203}
1204std::wstring Int64ToWString(int64 value) {
1205  return IntToStringT<std::wstring, int64, uint64, true>::
1206      IntToString(value);
1207}
1208std::string Uint64ToString(uint64 value) {
1209  return IntToStringT<std::string, uint64, uint64, false>::
1210      IntToString(value);
1211}
1212std::wstring Uint64ToWString(uint64 value) {
1213  return IntToStringT<std::wstring, uint64, uint64, false>::
1214      IntToString(value);
1215}
1216
1217std::string DoubleToString(double value) {
1218  // According to g_fmt.cc, it is sufficient to declare a buffer of size 32.
1219  char buffer[32];
1220  dmg_fp::g_fmt(buffer, value);
1221  return std::string(buffer);
1222}
1223
1224std::wstring DoubleToWString(double value) {
1225  return ASCIIToWide(DoubleToString(value));
1226}
1227
1228void StringAppendV(std::string* dst, const char* format, va_list ap) {
1229  StringAppendVT(dst, format, ap);
1230}
1231
1232void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) {
1233  StringAppendVT(dst, format, ap);
1234}
1235
1236std::string StringPrintf(const char* format, ...) {
1237  va_list ap;
1238  va_start(ap, format);
1239  std::string result;
1240  StringAppendV(&result, format, ap);
1241  va_end(ap);
1242  return result;
1243}
1244
1245std::wstring StringPrintf(const wchar_t* format, ...) {
1246  va_list ap;
1247  va_start(ap, format);
1248  std::wstring result;
1249  StringAppendV(&result, format, ap);
1250  va_end(ap);
1251  return result;
1252}
1253
1254std::string StringPrintV(const char* format, va_list ap) {
1255  std::string result;
1256  StringAppendV(&result, format, ap);
1257  return result;
1258}
1259
1260const std::string& SStringPrintf(std::string* dst, const char* format, ...) {
1261  va_list ap;
1262  va_start(ap, format);
1263  dst->clear();
1264  StringAppendV(dst, format, ap);
1265  va_end(ap);
1266  return *dst;
1267}
1268
1269const std::wstring& SStringPrintf(std::wstring* dst,
1270                                  const wchar_t* format, ...) {
1271  va_list ap;
1272  va_start(ap, format);
1273  dst->clear();
1274  StringAppendV(dst, format, ap);
1275  va_end(ap);
1276  return *dst;
1277}
1278
1279void StringAppendF(std::string* dst, const char* format, ...) {
1280  va_list ap;
1281  va_start(ap, format);
1282  StringAppendV(dst, format, ap);
1283  va_end(ap);
1284}
1285
1286void StringAppendF(std::wstring* dst, const wchar_t* format, ...) {
1287  va_list ap;
1288  va_start(ap, format);
1289  StringAppendV(dst, format, ap);
1290  va_end(ap);
1291}
1292
1293template<typename STR>
1294static void SplitStringT(const STR& str,
1295                         const typename STR::value_type s,
1296                         bool trim_whitespace,
1297                         std::vector<STR>* r) {
1298  size_t last = 0;
1299  size_t i;
1300  size_t c = str.size();
1301  for (i = 0; i <= c; ++i) {
1302    if (i == c || str[i] == s) {
1303      size_t len = i - last;
1304      STR tmp = str.substr(last, len);
1305      if (trim_whitespace) {
1306        STR t_tmp;
1307        TrimWhitespace(tmp, TRIM_ALL, &t_tmp);
1308        r->push_back(t_tmp);
1309      } else {
1310        r->push_back(tmp);
1311      }
1312      last = i + 1;
1313    }
1314  }
1315}
1316
1317void SplitString(const std::wstring& str,
1318                 wchar_t s,
1319                 std::vector<std::wstring>* r) {
1320  SplitStringT(str, s, true, r);
1321}
1322
1323#if !defined(WCHAR_T_IS_UTF16)
1324void SplitString(const string16& str,
1325                 char16 s,
1326                 std::vector<string16>* r) {
1327  SplitStringT(str, s, true, r);
1328}
1329#endif
1330
1331void SplitString(const std::string& str,
1332                 char s,
1333                 std::vector<std::string>* r) {
1334  SplitStringT(str, s, true, r);
1335}
1336
1337void SplitStringDontTrim(const std::wstring& str,
1338                         wchar_t s,
1339                         std::vector<std::wstring>* r) {
1340  SplitStringT(str, s, false, r);
1341}
1342
1343#if !defined(WCHAR_T_IS_UTF16)
1344void SplitStringDontTrim(const string16& str,
1345                         char16 s,
1346                         std::vector<string16>* r) {
1347  SplitStringT(str, s, false, r);
1348}
1349#endif
1350
1351void SplitStringDontTrim(const std::string& str,
1352                         char s,
1353                         std::vector<std::string>* r) {
1354  SplitStringT(str, s, false, r);
1355}
1356
1357template <typename STR>
1358static void SplitStringUsingSubstrT(const STR& str,
1359                                    const STR& s,
1360                                    std::vector<STR>* r) {
1361  typename STR::size_type begin_index = 0;
1362  while (true) {
1363    const typename STR::size_type end_index = str.find(s, begin_index);
1364    if (end_index == STR::npos) {
1365      const STR term = str.substr(begin_index);
1366      STR tmp;
1367      TrimWhitespace(term, TRIM_ALL, &tmp);
1368      r->push_back(tmp);
1369      return;
1370    }
1371    const STR term = str.substr(begin_index, end_index - begin_index);
1372    STR tmp;
1373    TrimWhitespace(term, TRIM_ALL, &tmp);
1374    r->push_back(tmp);
1375    begin_index = end_index + s.size();
1376  }
1377}
1378
1379void SplitStringUsingSubstr(const string16& str,
1380                            const string16& s,
1381                            std::vector<string16>* r) {
1382  SplitStringUsingSubstrT(str, s, r);
1383}
1384
1385void SplitStringUsingSubstr(const std::string& str,
1386                            const std::string& s,
1387                            std::vector<std::string>* r) {
1388  SplitStringUsingSubstrT(str, s, r);
1389}
1390
1391template<typename STR>
1392static size_t TokenizeT(const STR& str,
1393                        const STR& delimiters,
1394                        std::vector<STR>* tokens) {
1395  tokens->clear();
1396
1397  typename STR::size_type start = str.find_first_not_of(delimiters);
1398  while (start != STR::npos) {
1399    typename STR::size_type end = str.find_first_of(delimiters, start + 1);
1400    if (end == STR::npos) {
1401      tokens->push_back(str.substr(start));
1402      break;
1403    } else {
1404      tokens->push_back(str.substr(start, end - start));
1405      start = str.find_first_not_of(delimiters, end + 1);
1406    }
1407  }
1408
1409  return tokens->size();
1410}
1411
1412size_t Tokenize(const std::wstring& str,
1413                const std::wstring& delimiters,
1414                std::vector<std::wstring>* tokens) {
1415  return TokenizeT(str, delimiters, tokens);
1416}
1417
1418#if !defined(WCHAR_T_IS_UTF16)
1419size_t Tokenize(const string16& str,
1420                const string16& delimiters,
1421                std::vector<string16>* tokens) {
1422  return TokenizeT(str, delimiters, tokens);
1423}
1424#endif
1425
1426size_t Tokenize(const std::string& str,
1427                const std::string& delimiters,
1428                std::vector<std::string>* tokens) {
1429  return TokenizeT(str, delimiters, tokens);
1430}
1431
1432size_t Tokenize(const base::StringPiece& str,
1433                const base::StringPiece& delimiters,
1434                std::vector<base::StringPiece>* tokens) {
1435  return TokenizeT(str, delimiters, tokens);
1436}
1437
1438template<typename STR>
1439static STR JoinStringT(const std::vector<STR>& parts,
1440                       typename STR::value_type sep) {
1441  if (parts.size() == 0) return STR();
1442
1443  STR result(parts[0]);
1444  typename std::vector<STR>::const_iterator iter = parts.begin();
1445  ++iter;
1446
1447  for (; iter != parts.end(); ++iter) {
1448    result += sep;
1449    result += *iter;
1450  }
1451
1452  return result;
1453}
1454
1455std::string JoinString(const std::vector<std::string>& parts, char sep) {
1456  return JoinStringT(parts, sep);
1457}
1458
1459#if !defined(WCHAR_T_IS_UTF16)
1460string16 JoinString(const std::vector<string16>& parts, char16 sep) {
1461  return JoinStringT(parts, sep);
1462}
1463#endif
1464
1465std::wstring JoinString(const std::vector<std::wstring>& parts, wchar_t sep) {
1466  return JoinStringT(parts, sep);
1467}
1468
1469template<typename STR>
1470void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
1471  const size_t length = str.length();
1472  if (!length)
1473    return;
1474
1475  bool last_was_ws = false;
1476  size_t last_non_ws_start = 0;
1477  for (size_t i = 0; i < length; ++i) {
1478    switch (str[i]) {
1479      // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
1480      case L' ':
1481      case L'\t':
1482      case L'\xA':
1483      case L'\xB':
1484      case L'\xC':
1485      case L'\xD':
1486        if (!last_was_ws) {
1487          if (i > 0) {
1488            result->push_back(
1489                str.substr(last_non_ws_start, i - last_non_ws_start));
1490          }
1491          last_was_ws = true;
1492        }
1493        break;
1494
1495      default:  // Not a space character.
1496        if (last_was_ws) {
1497          last_was_ws = false;
1498          last_non_ws_start = i;
1499        }
1500        break;
1501    }
1502  }
1503  if (!last_was_ws) {
1504    result->push_back(
1505        str.substr(last_non_ws_start, length - last_non_ws_start));
1506  }
1507}
1508
1509void SplitStringAlongWhitespace(const std::wstring& str,
1510                                std::vector<std::wstring>* result) {
1511  SplitStringAlongWhitespaceT(str, result);
1512}
1513
1514#if !defined(WCHAR_T_IS_UTF16)
1515void SplitStringAlongWhitespace(const string16& str,
1516                                std::vector<string16>* result) {
1517  SplitStringAlongWhitespaceT(str, result);
1518}
1519#endif
1520
1521void SplitStringAlongWhitespace(const std::string& str,
1522                                std::vector<std::string>* result) {
1523  SplitStringAlongWhitespaceT(str, result);
1524}
1525
1526template<class FormatStringType, class OutStringType>
1527OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
1528    const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
1529  size_t substitutions = subst.size();
1530  DCHECK(substitutions < 10);
1531
1532  size_t sub_length = 0;
1533  for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
1534       iter != subst.end(); ++iter) {
1535    sub_length += (*iter).length();
1536  }
1537
1538  OutStringType formatted;
1539  formatted.reserve(format_string.length() + sub_length);
1540
1541  std::vector<ReplacementOffset> r_offsets;
1542  for (typename FormatStringType::const_iterator i = format_string.begin();
1543       i != format_string.end(); ++i) {
1544    if ('$' == *i) {
1545      if (i + 1 != format_string.end()) {
1546        ++i;
1547        DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
1548        if ('$' == *i) {
1549          formatted.push_back('$');
1550        } else {
1551          uintptr_t index = *i - '1';
1552          if (offsets) {
1553            ReplacementOffset r_offset(index,
1554                static_cast<int>(formatted.size()));
1555            r_offsets.insert(std::lower_bound(r_offsets.begin(),
1556                r_offsets.end(), r_offset,
1557                &CompareParameter),
1558                r_offset);
1559          }
1560          if (index < substitutions)
1561            formatted.append(subst.at(index));
1562        }
1563      }
1564    } else {
1565      formatted.push_back(*i);
1566    }
1567  }
1568  if (offsets) {
1569    for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
1570        i != r_offsets.end(); ++i) {
1571      offsets->push_back(i->offset);
1572    }
1573  }
1574  return formatted;
1575}
1576
1577string16 ReplaceStringPlaceholders(const string16& format_string,
1578                                   const std::vector<string16>& subst,
1579                                   std::vector<size_t>* offsets) {
1580  return DoReplaceStringPlaceholders(format_string, subst, offsets);
1581}
1582
1583std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
1584                                      const std::vector<std::string>& subst,
1585                                      std::vector<size_t>* offsets) {
1586  return DoReplaceStringPlaceholders(format_string, subst, offsets);
1587}
1588
1589string16 ReplaceStringPlaceholders(const string16& format_string,
1590                                   const string16& a,
1591                                   size_t* offset) {
1592  std::vector<size_t> offsets;
1593  std::vector<string16> subst;
1594  subst.push_back(a);
1595  string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
1596
1597  DCHECK(offsets.size() == 1);
1598  if (offset) {
1599    *offset = offsets[0];
1600  }
1601  return result;
1602}
1603
1604template <class CHAR>
1605static bool IsWildcard(CHAR character) {
1606  return character == '*' || character == '?';
1607}
1608
1609// Move the strings pointers to the point where they start to differ.
1610template <class CHAR>
1611static void EatSameChars(const CHAR** pattern, const CHAR** string) {
1612  bool escaped = false;
1613  while (**pattern && **string) {
1614    if (!escaped && IsWildcard(**pattern)) {
1615      // We don't want to match wildcard here, except if it's escaped.
1616      return;
1617    }
1618
1619    // Check if the escapement char is found. If so, skip it and move to the
1620    // next character.
1621    if (!escaped && **pattern == L'\\') {
1622      escaped = true;
1623      (*pattern)++;
1624      continue;
1625    }
1626
1627    // Check if the chars match, if so, increment the ptrs.
1628    if (**pattern == **string) {
1629      (*pattern)++;
1630      (*string)++;
1631    } else {
1632      // Uh ho, it did not match, we are done. If the last char was an
1633      // escapement, that means that it was an error to advance the ptr here,
1634      // let's put it back where it was. This also mean that the MatchPattern
1635      // function will return false because if we can't match an escape char
1636      // here, then no one will.
1637      if (escaped) {
1638        (*pattern)--;
1639      }
1640      return;
1641    }
1642
1643    escaped = false;
1644  }
1645}
1646
1647template <class CHAR>
1648static void EatWildcard(const CHAR** pattern) {
1649  while (**pattern) {
1650    if (!IsWildcard(**pattern))
1651      return;
1652    (*pattern)++;
1653  }
1654}
1655
1656template <class CHAR>
1657static bool MatchPatternT(const CHAR* eval, const CHAR* pattern, int depth) {
1658  const int kMaxDepth = 16;
1659  if (depth > kMaxDepth)
1660    return false;
1661
1662  // Eat all the matching chars.
1663  EatSameChars(&pattern, &eval);
1664
1665  // If the string is empty, then the pattern must be empty too, or contains
1666  // only wildcards.
1667  if (*eval == 0) {
1668    EatWildcard(&pattern);
1669    if (*pattern)
1670      return false;
1671    return true;
1672  }
1673
1674  // Pattern is empty but not string, this is not a match.
1675  if (*pattern == 0)
1676    return false;
1677
1678  // If this is a question mark, then we need to compare the rest with
1679  // the current string or the string with one character eaten.
1680  if (pattern[0] == '?') {
1681    if (MatchPatternT(eval, pattern + 1, depth + 1) ||
1682        MatchPatternT(eval + 1, pattern + 1, depth + 1))
1683      return true;
1684  }
1685
1686  // This is a *, try to match all the possible substrings with the remainder
1687  // of the pattern.
1688  if (pattern[0] == '*') {
1689    while (*eval) {
1690      if (MatchPatternT(eval, pattern + 1, depth + 1))
1691        return true;
1692      eval++;
1693    }
1694
1695    // We reached the end of the string, let see if the pattern contains only
1696    // wildcards.
1697    if (*eval == 0) {
1698      EatWildcard(&pattern);
1699      if (*pattern)
1700        return false;
1701      return true;
1702    }
1703  }
1704
1705  return false;
1706}
1707
1708bool MatchPatternWide(const std::wstring& eval, const std::wstring& pattern) {
1709  return MatchPatternT(eval.c_str(), pattern.c_str(), 0);
1710}
1711
1712bool MatchPatternASCII(const std::string& eval, const std::string& pattern) {
1713  DCHECK(IsStringASCII(eval) && IsStringASCII(pattern));
1714  return MatchPatternT(eval.c_str(), pattern.c_str(), 0);
1715}
1716
1717bool StringToInt(const std::string& input, int* output) {
1718  return StringToNumber<StringToIntTraits>(input, output);
1719}
1720
1721bool StringToInt(const string16& input, int* output) {
1722  return StringToNumber<String16ToIntTraits>(input, output);
1723}
1724
1725bool StringToInt64(const std::string& input, int64* output) {
1726  return StringToNumber<StringToInt64Traits>(input, output);
1727}
1728
1729bool StringToInt64(const string16& input, int64* output) {
1730  return StringToNumber<String16ToInt64Traits>(input, output);
1731}
1732
1733bool HexStringToInt(const std::string& input, int* output) {
1734  return StringToNumber<HexStringToIntTraits>(input, output);
1735}
1736
1737bool HexStringToInt(const string16& input, int* output) {
1738  return StringToNumber<HexString16ToIntTraits>(input, output);
1739}
1740
1741namespace {
1742
1743template<class CHAR>
1744bool HexDigitToIntT(const CHAR digit, uint8* val) {
1745  if (digit >= '0' && digit <= '9')
1746    *val = digit - '0';
1747  else if (digit >= 'a' && digit <= 'f')
1748    *val = 10 + digit - 'a';
1749  else if (digit >= 'A' && digit <= 'F')
1750    *val = 10 + digit - 'A';
1751  else
1752    return false;
1753  return true;
1754}
1755
1756template<typename STR>
1757bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) {
1758  DCHECK(output->size() == 0);
1759  size_t count = input.size();
1760  if (count == 0 || (count % 2) != 0)
1761    return false;
1762  for (uintptr_t i = 0; i < count / 2; ++i) {
1763    uint8 msb = 0;  // most significant 4 bits
1764    uint8 lsb = 0;  // least significant 4 bits
1765    if (!HexDigitToIntT(input[i * 2], &msb) ||
1766        !HexDigitToIntT(input[i * 2 + 1], &lsb))
1767      return false;
1768    output->push_back((msb << 4) | lsb);
1769  }
1770  return true;
1771}
1772
1773}  // namespace
1774
1775bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) {
1776  return HexStringToBytesT(input, output);
1777}
1778
1779bool HexStringToBytes(const string16& input, std::vector<uint8>* output) {
1780  return HexStringToBytesT(input, output);
1781}
1782
1783int StringToInt(const std::string& value) {
1784  int result;
1785  StringToInt(value, &result);
1786  return result;
1787}
1788
1789int StringToInt(const string16& value) {
1790  int result;
1791  StringToInt(value, &result);
1792  return result;
1793}
1794
1795int64 StringToInt64(const std::string& value) {
1796  int64 result;
1797  StringToInt64(value, &result);
1798  return result;
1799}
1800
1801int64 StringToInt64(const string16& value) {
1802  int64 result;
1803  StringToInt64(value, &result);
1804  return result;
1805}
1806
1807int HexStringToInt(const std::string& value) {
1808  int result;
1809  HexStringToInt(value, &result);
1810  return result;
1811}
1812
1813int HexStringToInt(const string16& value) {
1814  int result;
1815  HexStringToInt(value, &result);
1816  return result;
1817}
1818
1819bool StringToDouble(const std::string& input, double* output) {
1820  return StringToNumber<StringToDoubleTraits>(input, output);
1821}
1822
1823bool StringToDouble(const string16& input, double* output) {
1824  return StringToNumber<String16ToDoubleTraits>(input, output);
1825}
1826
1827double StringToDouble(const std::string& value) {
1828  double result;
1829  StringToDouble(value, &result);
1830  return result;
1831}
1832
1833double StringToDouble(const string16& value) {
1834  double result;
1835  StringToDouble(value, &result);
1836  return result;
1837}
1838
1839// The following code is compatible with the OpenBSD lcpy interface.  See:
1840//   http://www.gratisoft.us/todd/papers/strlcpy.html
1841//   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
1842
1843namespace {
1844
1845template <typename CHAR>
1846size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1847  for (size_t i = 0; i < dst_size; ++i) {
1848    if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
1849      return i;
1850  }
1851
1852  // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
1853  if (dst_size != 0)
1854    dst[dst_size - 1] = 0;
1855
1856  // Count the rest of the |src|, and return it's length in characters.
1857  while (src[dst_size]) ++dst_size;
1858  return dst_size;
1859}
1860
1861}  // namespace
1862
1863size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1864  return lcpyT<char>(dst, src, dst_size);
1865}
1866size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1867  return lcpyT<wchar_t>(dst, src, dst_size);
1868}
1869
1870bool ElideString(const std::wstring& input, int max_len, std::wstring* output) {
1871  DCHECK(max_len >= 0);
1872  if (static_cast<int>(input.length()) <= max_len) {
1873    output->assign(input);
1874    return false;
1875  }
1876
1877  switch (max_len) {
1878    case 0:
1879      output->clear();
1880      break;
1881    case 1:
1882      output->assign(input.substr(0, 1));
1883      break;
1884    case 2:
1885      output->assign(input.substr(0, 2));
1886      break;
1887    case 3:
1888      output->assign(input.substr(0, 1) + L"." +
1889                     input.substr(input.length() - 1));
1890      break;
1891    case 4:
1892      output->assign(input.substr(0, 1) + L".." +
1893                     input.substr(input.length() - 1));
1894      break;
1895    default: {
1896      int rstr_len = (max_len - 3) / 2;
1897      int lstr_len = rstr_len + ((max_len - 3) % 2);
1898      output->assign(input.substr(0, lstr_len) + L"..." +
1899                     input.substr(input.length() - rstr_len));
1900      break;
1901    }
1902  }
1903
1904  return true;
1905}
1906
1907std::string HexEncode(const void* bytes, size_t size) {
1908  static const char kHexChars[] = "0123456789ABCDEF";
1909
1910  // Each input byte creates two output hex characters.
1911  std::string ret(size * 2, '\0');
1912
1913  for (size_t i = 0; i < size; ++i) {
1914    char b = reinterpret_cast<const char*>(bytes)[i];
1915    ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
1916    ret[(i * 2) + 1] = kHexChars[b & 0xf];
1917  }
1918  return ret;
1919}
1920