string_util.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// Use of this source code is governed by a BSD-style license that can be 3231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// found in the LICENSE file. 4231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 5231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/string_util.h" 6231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 7231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "build/build_config.h" 8231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 9231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <ctype.h> 10231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <errno.h> 11231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <math.h> 12231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <stdarg.h> 13231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <stdio.h> 14231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <stdlib.h> 15231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <string.h> 16231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <time.h> 17231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <wchar.h> 18231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <wctype.h> 19231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 20231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <algorithm> 21231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include <vector> 22231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 23231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/basictypes.h" 24231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/logging.h" 25231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/singleton.h" 26231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/third_party/dmg_fp/dmg_fp.h" 27231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/utf_string_conversion_utils.h" 28231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#include "base/third_party/icu/icu_utf.h" 29231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 30231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blocknamespace { 31231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 32231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// Force the singleton used by Empty[W]String[16] to be a unique type. This 33231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// prevents other code that might accidentally use Singleton<string> from 34231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// getting our internal one. 35231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockstruct EmptyStrings { 36231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block EmptyStrings() {} 37231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block const std::string s; 38231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block const std::wstring ws; 39231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block const string16 s16; 40231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}; 41231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 42231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// Used by ReplaceStringPlaceholders to track the position in the string of 43231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// replaced parameters. 44231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockstruct ReplacementOffset { 45231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block ReplacementOffset(uintptr_t parameter, size_t offset) 46231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block : parameter(parameter), 47231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block offset(offset) {} 48231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 49231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // Index of the parameter. 50231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block uintptr_t parameter; 51231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 52231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // Starting position in the string. 53231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block size_t offset; 54231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}; 55231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 56231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockstatic bool CompareParameter(const ReplacementOffset& elem1, 57231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block const ReplacementOffset& elem2) { 58231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return elem1.parameter < elem2.parameter; 59231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block} 60231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 61231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// Generalized string-to-number conversion. 62231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// 63231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// StringToNumberTraits should provide: 64231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// - a typedef for string_type, the STL string type used as input. 65231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// - a typedef for value_type, the target numeric type. 66231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// - a static function, convert_func, which dispatches to an appropriate 67231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// strtol-like function and returns type value_type. 68231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// - a static function, valid_func, which validates |input| and returns a bool 69231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// indicating whether it is in proper form. This is used to check for 70231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// conditions that convert_func tolerates but should result in 71231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// StringToNumber returning false. For strtol-like funtions, valid_func 72231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// should check for leading whitespace. 73231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blocktemplate<typename StringToNumberTraits> 74231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockbool StringToNumber(const typename StringToNumberTraits::string_type& input, 75231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typename StringToNumberTraits::value_type* output) { 76231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef StringToNumberTraits traits; 77cac0f67c402d107cdb10971b95719e2ff9c7c76bSteve Block 78231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block errno = 0; // Thread-safe? It is on at least Mac, Linux, and Windows. 79231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typename traits::string_type::value_type* endptr = NULL; 80231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typename traits::value_type value = traits::convert_func(input.c_str(), 81231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block &endptr); 82231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block *output = value; 83231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 84231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // Cases to return false: 85231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // - If errno is ERANGE, there was an overflow or underflow. 86231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // - If the input string is empty, there was nothing to parse. 87231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // - If endptr does not point to the end of the string, there are either 88231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // characters remaining in the string after a parsed number, or the string 89231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // does not begin with a parseable number. endptr is compared to the 90231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // expected end given the string's stated length to correctly catch cases 91231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // where the string contains embedded NUL characters. 92231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // - valid_func determines that the input is not in preferred form. 93231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return errno == 0 && 94231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block !input.empty() && 95231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block input.c_str() + input.length() == endptr && 96231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block traits::valid_func(input); 97231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block} 98231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 99231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockstatic int strtoi(const char *nptr, char **endptr, int base) { 100231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block long res = strtol(nptr, endptr, base); 101231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#if __LP64__ 102231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // Long is 64-bits, we have to handle under/overflow ourselves. 103231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block if (res > kint32max) { 104231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block res = kint32max; 105231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block errno = ERANGE; 106231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } else if (res < kint32min) { 107231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block res = kint32min; 108231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block errno = ERANGE; 109231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 110231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif 111231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return static_cast<int>(res); 112231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block} 113231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 114231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockstatic unsigned int strtoui(const char *nptr, char **endptr, int base) { 115231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block unsigned long res = strtoul(nptr, endptr, base); 116231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#if __LP64__ 117231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // Long is 64-bits, we have to handle under/overflow ourselves. Test to see 118231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // if the result can fit into 32-bits (as signed or unsigned). 119231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block if (static_cast<int>(static_cast<long>(res)) != static_cast<long>(res) && 120231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static_cast<unsigned int>(res) != res) { 121231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block res = kuint32max; 122231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block errno = ERANGE; 123231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 124231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif 125231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return static_cast<unsigned int>(res); 126231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block} 127231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 128231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass StringToIntTraits { 129231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public: 130231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef std::string string_type; 131231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef int value_type; 132231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static const int kBase = 10; 133231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline value_type convert_func(const string_type::value_type* str, 134231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block string_type::value_type** endptr) { 135231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return strtoi(str, endptr, kBase); 136231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 137231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline bool valid_func(const string_type& str) { 138231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return !str.empty() && !isspace(str[0]); 139231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 140231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}; 141231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 142231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass String16ToIntTraits { 143231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public: 144231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef string16 string_type; 145231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef int value_type; 146231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static const int kBase = 10; 147231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline value_type convert_func(const string_type::value_type* str, 148231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block string_type::value_type** endptr) { 149231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#if defined(WCHAR_T_IS_UTF16) 150231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return wcstol(str, endptr, kBase); 151231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#elif defined(WCHAR_T_IS_UTF32) 152231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block std::string ascii_string = UTF16ToASCII(string16(str)); 153231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block char* ascii_end = NULL; 154231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block value_type ret = strtoi(ascii_string.c_str(), &ascii_end, kBase); 155231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block if (ascii_string.c_str() + ascii_string.length() == ascii_end) { 156231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block *endptr = 157231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block const_cast<string_type::value_type*>(str) + ascii_string.length(); 158231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 159231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return ret; 160231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif 161231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 162231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline bool valid_func(const string_type& str) { 163231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return !str.empty() && !iswspace(str[0]); 164231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 165231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}; 166231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 167231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass StringToInt64Traits { 168231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public: 169231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef std::string string_type; 170231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef int64 value_type; 171231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static const int kBase = 10; 172231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline value_type convert_func(const string_type::value_type* str, 173231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block string_type::value_type** endptr) { 174231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#ifdef OS_WIN 175231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return _strtoi64(str, endptr, kBase); 176231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#else // assume OS_POSIX 177231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return strtoll(str, endptr, kBase); 178231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif 179231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 180231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline bool valid_func(const string_type& str) { 181231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return !str.empty() && !isspace(str[0]); 182231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 183231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}; 184231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 185231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass String16ToInt64Traits { 186231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public: 187231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef string16 string_type; 188231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef int64 value_type; 189231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static const int kBase = 10; 190231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline value_type convert_func(const string_type::value_type* str, 191231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block string_type::value_type** endptr) { 192231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#ifdef OS_WIN 193231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return _wcstoi64(str, endptr, kBase); 194231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#else // assume OS_POSIX 195231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block std::string ascii_string = UTF16ToASCII(string16(str)); 196231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block char* ascii_end = NULL; 197231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block value_type ret = strtoll(ascii_string.c_str(), &ascii_end, kBase); 198231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block if (ascii_string.c_str() + ascii_string.length() == ascii_end) { 199231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block *endptr = 200231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block const_cast<string_type::value_type*>(str) + ascii_string.length(); 201231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 202231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return ret; 203231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif 204231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 205231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline bool valid_func(const string_type& str) { 206231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return !str.empty() && !iswspace(str[0]); 207231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 208231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}; 209231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 210231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// For the HexString variants, use the unsigned variants like strtoul for 211231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block// convert_func so that input like "0x80000000" doesn't result in an overflow. 212231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 213231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass HexStringToIntTraits { 214231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public: 215231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef std::string string_type; 216231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef int value_type; 217231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static const int kBase = 16; 218231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline value_type convert_func(const string_type::value_type* str, 219231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block string_type::value_type** endptr) { 220231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return strtoui(str, endptr, kBase); 221231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 222231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline bool valid_func(const string_type& str) { 223231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return !str.empty() && !isspace(str[0]); 224231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 225231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}; 226231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 227231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass HexString16ToIntTraits { 228231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public: 229231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef string16 string_type; 230231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef int value_type; 231231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static const int kBase = 16; 232231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline value_type convert_func(const string_type::value_type* str, 23321939df44de1705786c545cd1bf519d47250322dBen Murdoch string_type::value_type** endptr) { 234231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#if defined(WCHAR_T_IS_UTF16) 23521939df44de1705786c545cd1bf519d47250322dBen Murdoch return wcstoul(str, endptr, kBase); 236231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#elif defined(WCHAR_T_IS_UTF32) 23721939df44de1705786c545cd1bf519d47250322dBen Murdoch std::string ascii_string = UTF16ToASCII(string16(str)); 238231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block char* ascii_end = NULL; 23921939df44de1705786c545cd1bf519d47250322dBen Murdoch value_type ret = strtoui(ascii_string.c_str(), &ascii_end, kBase); 240231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block if (ascii_string.c_str() + ascii_string.length() == ascii_end) { 241231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block *endptr = 242231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block const_cast<string_type::value_type*>(str) + ascii_string.length(); 243231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 244231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return ret; 245231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#endif 246231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 247231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline bool valid_func(const string_type& str) { 248231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return !str.empty() && !iswspace(str[0]); 249231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 250231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}; 251231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 252231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass StringToDoubleTraits { 253231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public: 254231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef std::string string_type; 255231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef double value_type; 256231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline value_type convert_func(const string_type::value_type* str, 257231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block string_type::value_type** endptr) { 258231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return dmg_fp::strtod(str, endptr); 259231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 260231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline bool valid_func(const string_type& str) { 261231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return !str.empty() && !isspace(str[0]); 262231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 263231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}; 264231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 265231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockclass String16ToDoubleTraits { 266231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block public: 267231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef string16 string_type; 268231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block typedef double value_type; 269231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline value_type convert_func(const string_type::value_type* str, 270231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block string_type::value_type** endptr) { 271231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // Because dmg_fp::strtod does not like char16, we convert it to ASCII. 272231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // In theory, this should be safe, but it's possible that 16-bit chars 273231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // might get ignored by accident causing something to be parsed when it 274231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // shouldn't. 275231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block std::string ascii_string = UTF16ToASCII(string16(str)); 276231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block char* ascii_end = NULL; 277231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block value_type ret = dmg_fp::strtod(ascii_string.c_str(), &ascii_end); 278231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block if (ascii_string.c_str() + ascii_string.length() == ascii_end) { 279231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // Put endptr at end of input string, so it's not recognized as an error. 280231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block *endptr = 281231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block const_cast<string_type::value_type*>(str) + ascii_string.length(); 282231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 283231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 284231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return ret; 285231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 286231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block static inline bool valid_func(const string_type& str) { 287231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return !str.empty() && !iswspace(str[0]); 288231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 289231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block}; 290231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 291231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block} // namespace 292231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 293231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 294231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blocknamespace base { 295231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 296231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockbool IsWprintfFormatPortable(const wchar_t* format) { 297231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block for (const wchar_t* position = format; *position != '\0'; ++position) { 298231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block if (*position == '%') { 299231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block bool in_specification = true; 300231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block bool modifier_l = false; 301231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block while (in_specification) { 302231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // Eat up characters until reaching a known specifier. 303231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block if (*++position == '\0') { 304231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // The format string ended in the middle of a specification. Call 305231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // it portable because no unportable specifications were found. The 306231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // string is equally broken on all platforms. 307231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return true; 308231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 309231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 310231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block if (*position == 'l') { 311231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // 'l' is the only thing that can save the 's' and 'c' specifiers. 312231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block modifier_l = true; 313231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } else if (((*position == 's' || *position == 'c') && !modifier_l) || 314231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block *position == 'S' || *position == 'C' || *position == 'F' || 315231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block *position == 'D' || *position == 'O' || *position == 'U') { 316231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // Not portable. 317231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return false; 318231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 319231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 320231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 321231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block // Portable, keep scanning the rest of the format string. 322231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block in_specification = false; 323231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 324231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 325231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 326231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block } 327231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 328231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return true; 329231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block} 330231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 331231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 332231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block} // namespace base 333231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 334231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 335231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockconst std::string& EmptyString() { 336231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return Singleton<EmptyStrings>::get()->s; 337231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block} 338231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 339231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockconst std::wstring& EmptyWString() { 340231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return Singleton<EmptyStrings>::get()->ws; 341231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block} 342231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 343231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Blockconst string16& EmptyString16() { 344231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block return Singleton<EmptyStrings>::get()->s16; 345231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block} 346231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 347231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block#define WHITESPACE_UNICODE \ 348231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x0009, /* <control-0009> to <control-000D> */ \ 349231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x000A, \ 350231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x000B, \ 351231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x000C, \ 352231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x000D, \ 353231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x0020, /* Space */ \ 354231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x0085, /* <control-0085> */ \ 355231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x00A0, /* No-Break Space */ \ 356231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x1680, /* Ogham Space Mark */ \ 357231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x180E, /* Mongolian Vowel Separator */ \ 358231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x2000, /* En Quad to Hair Space */ \ 359231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x2001, \ 360231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x2002, \ 361231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x2003, \ 362231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x2004, \ 363231d4e3152a9c27a73b6ac7badbe6be673aa3ddfSteve Block 0x2005, \ 364 0x2006, \ 365 0x2007, \ 366 0x2008, \ 367 0x2009, \ 368 0x200A, \ 369 0x200C, /* Zero Width Non-Joiner */ \ 370 0x2028, /* Line Separator */ \ 371 0x2029, /* Paragraph Separator */ \ 372 0x202F, /* Narrow No-Break Space */ \ 373 0x205F, /* Medium Mathematical Space */ \ 374 0x3000, /* Ideographic Space */ \ 375 0 376 377const wchar_t kWhitespaceWide[] = { 378 WHITESPACE_UNICODE 379}; 380const char16 kWhitespaceUTF16[] = { 381 WHITESPACE_UNICODE 382}; 383const char kWhitespaceASCII[] = { 384 0x09, // <control-0009> to <control-000D> 385 0x0A, 386 0x0B, 387 0x0C, 388 0x0D, 389 0x20, // Space 390 0 391}; 392 393const char kUtf8ByteOrderMark[] = "\xEF\xBB\xBF"; 394 395template<typename STR> 396bool RemoveCharsT(const STR& input, 397 const typename STR::value_type remove_chars[], 398 STR* output) { 399 bool removed = false; 400 size_t found; 401 402 *output = input; 403 404 found = output->find_first_of(remove_chars); 405 while (found != STR::npos) { 406 removed = true; 407 output->replace(found, 1, STR()); 408 found = output->find_first_of(remove_chars, found); 409 } 410 411 return removed; 412} 413 414bool RemoveChars(const std::wstring& input, 415 const wchar_t remove_chars[], 416 std::wstring* output) { 417 return RemoveCharsT(input, remove_chars, output); 418} 419 420#if !defined(WCHAR_T_IS_UTF16) 421bool RemoveChars(const string16& input, 422 const char16 remove_chars[], 423 string16* output) { 424 return RemoveCharsT(input, remove_chars, output); 425} 426#endif 427 428bool RemoveChars(const std::string& input, 429 const char remove_chars[], 430 std::string* output) { 431 return RemoveCharsT(input, remove_chars, output); 432} 433 434template<typename STR> 435TrimPositions TrimStringT(const STR& input, 436 const typename STR::value_type trim_chars[], 437 TrimPositions positions, 438 STR* output) { 439 // Find the edges of leading/trailing whitespace as desired. 440 const typename STR::size_type last_char = input.length() - 1; 441 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ? 442 input.find_first_not_of(trim_chars) : 0; 443 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ? 444 input.find_last_not_of(trim_chars) : last_char; 445 446 // When the string was all whitespace, report that we stripped off whitespace 447 // from whichever position the caller was interested in. For empty input, we 448 // stripped no whitespace, but we still need to clear |output|. 449 if (input.empty() || 450 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { 451 bool input_was_empty = input.empty(); // in case output == &input 452 output->clear(); 453 return input_was_empty ? TRIM_NONE : positions; 454 } 455 456 // Trim the whitespace. 457 *output = 458 input.substr(first_good_char, last_good_char - first_good_char + 1); 459 460 // Return where we trimmed from. 461 return static_cast<TrimPositions>( 462 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 463 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 464} 465 466bool TrimString(const std::wstring& input, 467 const wchar_t trim_chars[], 468 std::wstring* output) { 469 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 470} 471 472#if !defined(WCHAR_T_IS_UTF16) 473bool TrimString(const string16& input, 474 const char16 trim_chars[], 475 string16* output) { 476 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 477} 478#endif 479 480bool TrimString(const std::string& input, 481 const char trim_chars[], 482 std::string* output) { 483 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 484} 485 486void TruncateUTF8ToByteSize(const std::string& input, 487 const size_t byte_size, 488 std::string* output) { 489 DCHECK(output); 490 if (byte_size > input.length()) { 491 *output = input; 492 return; 493 } 494 DCHECK_LE(byte_size, static_cast<uint32>(kint32max)); 495 // Note: This cast is necessary because CBU8_NEXT uses int32s. 496 int32 truncation_length = static_cast<int32>(byte_size); 497 int32 char_index = truncation_length - 1; 498 const char* data = input.data(); 499 500 // Using CBU8, we will move backwards from the truncation point 501 // to the beginning of the string looking for a valid UTF8 502 // character. Once a full UTF8 character is found, we will 503 // truncate the string to the end of that character. 504 while (char_index >= 0) { 505 int32 prev = char_index; 506 uint32 code_point = 0; 507 CBU8_NEXT(data, char_index, truncation_length, code_point); 508 if (!base::IsValidCharacter(code_point) || 509 !base::IsValidCodepoint(code_point)) { 510 char_index = prev - 1; 511 } else { 512 break; 513 } 514 } 515 516 if (char_index >= 0 ) 517 *output = input.substr(0, char_index); 518 else 519 output->clear(); 520} 521 522TrimPositions TrimWhitespace(const std::wstring& input, 523 TrimPositions positions, 524 std::wstring* output) { 525 return TrimStringT(input, kWhitespaceWide, positions, output); 526} 527 528#if !defined(WCHAR_T_IS_UTF16) 529TrimPositions TrimWhitespace(const string16& input, 530 TrimPositions positions, 531 string16* output) { 532 return TrimStringT(input, kWhitespaceUTF16, positions, output); 533} 534#endif 535 536TrimPositions TrimWhitespaceASCII(const std::string& input, 537 TrimPositions positions, 538 std::string* output) { 539 return TrimStringT(input, kWhitespaceASCII, positions, output); 540} 541 542// This function is only for backward-compatibility. 543// To be removed when all callers are updated. 544TrimPositions TrimWhitespace(const std::string& input, 545 TrimPositions positions, 546 std::string* output) { 547 return TrimWhitespaceASCII(input, positions, output); 548} 549 550template<typename STR> 551STR CollapseWhitespaceT(const STR& text, 552 bool trim_sequences_with_line_breaks) { 553 STR result; 554 result.resize(text.size()); 555 556 // Set flags to pretend we're already in a trimmed whitespace sequence, so we 557 // will trim any leading whitespace. 558 bool in_whitespace = true; 559 bool already_trimmed = true; 560 561 int chars_written = 0; 562 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 563 if (IsWhitespace(*i)) { 564 if (!in_whitespace) { 565 // Reduce all whitespace sequences to a single space. 566 in_whitespace = true; 567 result[chars_written++] = L' '; 568 } 569 if (trim_sequences_with_line_breaks && !already_trimmed && 570 ((*i == '\n') || (*i == '\r'))) { 571 // Whitespace sequences containing CR or LF are eliminated entirely. 572 already_trimmed = true; 573 --chars_written; 574 } 575 } else { 576 // Non-whitespace chracters are copied straight across. 577 in_whitespace = false; 578 already_trimmed = false; 579 result[chars_written++] = *i; 580 } 581 } 582 583 if (in_whitespace && !already_trimmed) { 584 // Any trailing whitespace is eliminated. 585 --chars_written; 586 } 587 588 result.resize(chars_written); 589 return result; 590} 591 592std::wstring CollapseWhitespace(const std::wstring& text, 593 bool trim_sequences_with_line_breaks) { 594 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 595} 596 597#if !defined(WCHAR_T_IS_UTF16) 598string16 CollapseWhitespace(const string16& text, 599 bool trim_sequences_with_line_breaks) { 600 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 601} 602#endif 603 604std::string CollapseWhitespaceASCII(const std::string& text, 605 bool trim_sequences_with_line_breaks) { 606 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 607} 608 609bool ContainsOnlyWhitespaceASCII(const std::string& str) { 610 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) { 611 if (!IsAsciiWhitespace(*i)) 612 return false; 613 } 614 return true; 615} 616 617bool ContainsOnlyWhitespace(const string16& str) { 618 for (string16::const_iterator i(str.begin()); i != str.end(); ++i) { 619 if (!IsWhitespace(*i)) 620 return false; 621 } 622 return true; 623} 624 625template<typename STR> 626static bool ContainsOnlyCharsT(const STR& input, const STR& characters) { 627 for (typename STR::const_iterator iter = input.begin(); 628 iter != input.end(); ++iter) { 629 if (characters.find(*iter) == STR::npos) 630 return false; 631 } 632 return true; 633} 634 635bool ContainsOnlyChars(const std::wstring& input, 636 const std::wstring& characters) { 637 return ContainsOnlyCharsT(input, characters); 638} 639 640#if !defined(WCHAR_T_IS_UTF16) 641bool ContainsOnlyChars(const string16& input, const string16& characters) { 642 return ContainsOnlyCharsT(input, characters); 643} 644#endif 645 646bool ContainsOnlyChars(const std::string& input, 647 const std::string& characters) { 648 return ContainsOnlyCharsT(input, characters); 649} 650 651std::string WideToASCII(const std::wstring& wide) { 652 DCHECK(IsStringASCII(wide)) << wide; 653 return std::string(wide.begin(), wide.end()); 654} 655 656std::wstring ASCIIToWide(const base::StringPiece& ascii) { 657 DCHECK(IsStringASCII(ascii)) << ascii; 658 return std::wstring(ascii.begin(), ascii.end()); 659} 660 661std::string UTF16ToASCII(const string16& utf16) { 662 DCHECK(IsStringASCII(utf16)) << utf16; 663 return std::string(utf16.begin(), utf16.end()); 664} 665 666string16 ASCIIToUTF16(const base::StringPiece& ascii) { 667 DCHECK(IsStringASCII(ascii)) << ascii; 668 return string16(ascii.begin(), ascii.end()); 669} 670 671// Latin1 is just the low range of Unicode, so we can copy directly to convert. 672bool WideToLatin1(const std::wstring& wide, std::string* latin1) { 673 std::string output; 674 output.resize(wide.size()); 675 latin1->clear(); 676 for (size_t i = 0; i < wide.size(); i++) { 677 if (wide[i] > 255) 678 return false; 679 output[i] = static_cast<char>(wide[i]); 680 } 681 latin1->swap(output); 682 return true; 683} 684 685bool IsString8Bit(const std::wstring& str) { 686 for (size_t i = 0; i < str.length(); i++) { 687 if (str[i] > 255) 688 return false; 689 } 690 return true; 691} 692 693template<class STR> 694static bool DoIsStringASCII(const STR& str) { 695 for (size_t i = 0; i < str.length(); i++) { 696 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; 697 if (c > 0x7F) 698 return false; 699 } 700 return true; 701} 702 703bool IsStringASCII(const std::wstring& str) { 704 return DoIsStringASCII(str); 705} 706 707#if !defined(WCHAR_T_IS_UTF16) 708bool IsStringASCII(const string16& str) { 709 return DoIsStringASCII(str); 710} 711#endif 712 713bool IsStringASCII(const base::StringPiece& str) { 714 return DoIsStringASCII(str); 715} 716 717bool IsStringUTF8(const std::string& str) { 718 const char *src = str.data(); 719 int32 src_len = static_cast<int32>(str.length()); 720 int32 char_index = 0; 721 722 while (char_index < src_len) { 723 int32 code_point; 724 CBU8_NEXT(src, char_index, src_len, code_point); 725 if (!base::IsValidCharacter(code_point)) 726 return false; 727 } 728 return true; 729} 730 731template<typename Iter> 732static inline bool DoLowerCaseEqualsASCII(Iter a_begin, 733 Iter a_end, 734 const char* b) { 735 for (Iter it = a_begin; it != a_end; ++it, ++b) { 736 if (!*b || ToLowerASCII(*it) != *b) 737 return false; 738 } 739 return *b == 0; 740} 741 742// Front-ends for LowerCaseEqualsASCII. 743bool LowerCaseEqualsASCII(const std::string& a, const char* b) { 744 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 745} 746 747bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) { 748 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 749} 750 751#if !defined(WCHAR_T_IS_UTF16) 752bool LowerCaseEqualsASCII(const string16& a, const char* b) { 753 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 754} 755#endif 756 757bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 758 std::string::const_iterator a_end, 759 const char* b) { 760 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 761} 762 763bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, 764 std::wstring::const_iterator a_end, 765 const char* b) { 766 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 767} 768 769#if !defined(WCHAR_T_IS_UTF16) 770bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 771 string16::const_iterator a_end, 772 const char* b) { 773 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 774} 775#endif 776 777bool LowerCaseEqualsASCII(const char* a_begin, 778 const char* a_end, 779 const char* b) { 780 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 781} 782 783bool LowerCaseEqualsASCII(const wchar_t* a_begin, 784 const wchar_t* a_end, 785 const char* b) { 786 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 787} 788 789#if !defined(WCHAR_T_IS_UTF16) 790bool LowerCaseEqualsASCII(const char16* a_begin, 791 const char16* a_end, 792 const char* b) { 793 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 794} 795#endif 796 797bool EqualsASCII(const string16& a, const base::StringPiece& b) { 798 if (a.length() != b.length()) 799 return false; 800 return std::equal(b.begin(), b.end(), a.begin()); 801} 802 803bool StartsWithASCII(const std::string& str, 804 const std::string& search, 805 bool case_sensitive) { 806 if (case_sensitive) 807 return str.compare(0, search.length(), search) == 0; 808 else 809 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; 810} 811 812template <typename STR> 813bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { 814 if (case_sensitive) { 815 return str.compare(0, search.length(), search) == 0; 816 } else { 817 if (search.size() > str.size()) 818 return false; 819 return std::equal(search.begin(), search.end(), str.begin(), 820 CaseInsensitiveCompare<typename STR::value_type>()); 821 } 822} 823 824bool StartsWith(const std::wstring& str, const std::wstring& search, 825 bool case_sensitive) { 826 return StartsWithT(str, search, case_sensitive); 827} 828 829#if !defined(WCHAR_T_IS_UTF16) 830bool StartsWith(const string16& str, const string16& search, 831 bool case_sensitive) { 832 return StartsWithT(str, search, case_sensitive); 833} 834#endif 835 836template <typename STR> 837bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { 838 typename STR::size_type str_length = str.length(); 839 typename STR::size_type search_length = search.length(); 840 if (search_length > str_length) 841 return false; 842 if (case_sensitive) { 843 return str.compare(str_length - search_length, search_length, search) == 0; 844 } else { 845 return std::equal(search.begin(), search.end(), 846 str.begin() + (str_length - search_length), 847 CaseInsensitiveCompare<typename STR::value_type>()); 848 } 849} 850 851bool EndsWith(const std::string& str, const std::string& search, 852 bool case_sensitive) { 853 return EndsWithT(str, search, case_sensitive); 854} 855 856bool EndsWith(const std::wstring& str, const std::wstring& search, 857 bool case_sensitive) { 858 return EndsWithT(str, search, case_sensitive); 859} 860 861#if !defined(WCHAR_T_IS_UTF16) 862bool EndsWith(const string16& str, const string16& search, 863 bool case_sensitive) { 864 return EndsWithT(str, search, case_sensitive); 865} 866#endif 867 868DataUnits GetByteDisplayUnits(int64 bytes) { 869 // The byte thresholds at which we display amounts. A byte count is displayed 870 // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1]. 871 // This must match the DataUnits enum. 872 static const int64 kUnitThresholds[] = { 873 0, // DATA_UNITS_BYTE, 874 3*1024, // DATA_UNITS_KIBIBYTE, 875 2*1024*1024, // DATA_UNITS_MEBIBYTE, 876 1024*1024*1024 // DATA_UNITS_GIBIBYTE, 877 }; 878 879 if (bytes < 0) { 880 NOTREACHED() << "Negative bytes value"; 881 return DATA_UNITS_BYTE; 882 } 883 884 int unit_index = arraysize(kUnitThresholds); 885 while (--unit_index > 0) { 886 if (bytes >= kUnitThresholds[unit_index]) 887 break; 888 } 889 890 DCHECK(unit_index >= DATA_UNITS_BYTE && unit_index <= DATA_UNITS_GIBIBYTE); 891 return DataUnits(unit_index); 892} 893 894// TODO(mpcomplete): deal with locale 895// Byte suffixes. This must match the DataUnits enum. 896static const wchar_t* const kByteStrings[] = { 897 L"B", 898 L"kB", 899 L"MB", 900 L"GB" 901}; 902 903static const wchar_t* const kSpeedStrings[] = { 904 L"B/s", 905 L"kB/s", 906 L"MB/s", 907 L"GB/s" 908}; 909 910std::wstring FormatBytesInternal(int64 bytes, 911 DataUnits units, 912 bool show_units, 913 const wchar_t* const* suffix) { 914 if (bytes < 0) { 915 NOTREACHED() << "Negative bytes value"; 916 return std::wstring(); 917 } 918 919 DCHECK(units >= DATA_UNITS_BYTE && units <= DATA_UNITS_GIBIBYTE); 920 921 // Put the quantity in the right units. 922 double unit_amount = static_cast<double>(bytes); 923 for (int i = 0; i < units; ++i) 924 unit_amount /= 1024.0; 925 926 wchar_t buf[64]; 927 if (bytes != 0 && units != DATA_UNITS_BYTE && unit_amount < 100) 928 base::swprintf(buf, arraysize(buf), L"%.1lf", unit_amount); 929 else 930 base::swprintf(buf, arraysize(buf), L"%.0lf", unit_amount); 931 932 std::wstring ret(buf); 933 if (show_units) { 934 ret += L" "; 935 ret += suffix[units]; 936 } 937 938 return ret; 939} 940 941std::wstring FormatBytes(int64 bytes, DataUnits units, bool show_units) { 942 return FormatBytesInternal(bytes, units, show_units, kByteStrings); 943} 944 945std::wstring FormatSpeed(int64 bytes, DataUnits units, bool show_units) { 946 return FormatBytesInternal(bytes, units, show_units, kSpeedStrings); 947} 948 949template<class StringType> 950void DoReplaceSubstringsAfterOffset(StringType* str, 951 typename StringType::size_type start_offset, 952 const StringType& find_this, 953 const StringType& replace_with, 954 bool replace_all) { 955 if ((start_offset == StringType::npos) || (start_offset >= str->length())) 956 return; 957 958 DCHECK(!find_this.empty()); 959 for (typename StringType::size_type offs(str->find(find_this, start_offset)); 960 offs != StringType::npos; offs = str->find(find_this, offs)) { 961 str->replace(offs, find_this.length(), replace_with); 962 offs += replace_with.length(); 963 964 if (!replace_all) 965 break; 966 } 967} 968 969void ReplaceFirstSubstringAfterOffset(string16* str, 970 string16::size_type start_offset, 971 const string16& find_this, 972 const string16& replace_with) { 973 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 974 false); // replace first instance 975} 976 977void ReplaceFirstSubstringAfterOffset(std::string* str, 978 std::string::size_type start_offset, 979 const std::string& find_this, 980 const std::string& replace_with) { 981 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 982 false); // replace first instance 983} 984 985void ReplaceSubstringsAfterOffset(string16* str, 986 string16::size_type start_offset, 987 const string16& find_this, 988 const string16& replace_with) { 989 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 990 true); // replace all instances 991} 992 993void ReplaceSubstringsAfterOffset(std::string* str, 994 std::string::size_type start_offset, 995 const std::string& find_this, 996 const std::string& replace_with) { 997 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 998 true); // replace all instances 999} 1000 1001// Overloaded wrappers around vsnprintf and vswprintf. The buf_size parameter 1002// is the size of the buffer. These return the number of characters in the 1003// formatted string excluding the NUL terminator. If the buffer is not 1004// large enough to accommodate the formatted string without truncation, they 1005// return the number of characters that would be in the fully-formatted string 1006// (vsnprintf, and vswprintf on Windows), or -1 (vswprintf on POSIX platforms). 1007inline int vsnprintfT(char* buffer, 1008 size_t buf_size, 1009 const char* format, 1010 va_list argptr) { 1011 return base::vsnprintf(buffer, buf_size, format, argptr); 1012} 1013 1014inline int vsnprintfT(wchar_t* buffer, 1015 size_t buf_size, 1016 const wchar_t* format, 1017 va_list argptr) { 1018 return base::vswprintf(buffer, buf_size, format, argptr); 1019} 1020 1021// Templatized backend for StringPrintF/StringAppendF. This does not finalize 1022// the va_list, the caller is expected to do that. 1023template <class StringType> 1024static void StringAppendVT(StringType* dst, 1025 const typename StringType::value_type* format, 1026 va_list ap) { 1027 // First try with a small fixed size buffer. 1028 // This buffer size should be kept in sync with StringUtilTest.GrowBoundary 1029 // and StringUtilTest.StringPrintfBounds. 1030 typename StringType::value_type stack_buf[1024]; 1031 1032 va_list ap_copy; 1033 GG_VA_COPY(ap_copy, ap); 1034 1035#if !defined(OS_WIN) 1036 errno = 0; 1037#endif 1038 int result = vsnprintfT(stack_buf, arraysize(stack_buf), format, ap_copy); 1039 va_end(ap_copy); 1040 1041 if (result >= 0 && result < static_cast<int>(arraysize(stack_buf))) { 1042 // It fit. 1043 dst->append(stack_buf, result); 1044 return; 1045 } 1046 1047 // Repeatedly increase buffer size until it fits. 1048 int mem_length = arraysize(stack_buf); 1049 while (true) { 1050 if (result < 0) { 1051#if !defined(OS_WIN) 1052 // On Windows, vsnprintfT always returns the number of characters in a 1053 // fully-formatted string, so if we reach this point, something else is 1054 // wrong and no amount of buffer-doubling is going to fix it. 1055 if (errno != 0 && errno != EOVERFLOW) 1056#endif 1057 { 1058 // If an error other than overflow occurred, it's never going to work. 1059 DLOG(WARNING) << "Unable to printf the requested string due to error."; 1060 return; 1061 } 1062 // Try doubling the buffer size. 1063 mem_length *= 2; 1064 } else { 1065 // We need exactly "result + 1" characters. 1066 mem_length = result + 1; 1067 } 1068 1069 if (mem_length > 32 * 1024 * 1024) { 1070 // That should be plenty, don't try anything larger. This protects 1071 // against huge allocations when using vsnprintfT implementations that 1072 // return -1 for reasons other than overflow without setting errno. 1073 DLOG(WARNING) << "Unable to printf the requested string due to size."; 1074 return; 1075 } 1076 1077 std::vector<typename StringType::value_type> mem_buf(mem_length); 1078 1079 // NOTE: You can only use a va_list once. Since we're in a while loop, we 1080 // need to make a new copy each time so we don't use up the original. 1081 GG_VA_COPY(ap_copy, ap); 1082 result = vsnprintfT(&mem_buf[0], mem_length, format, ap_copy); 1083 va_end(ap_copy); 1084 1085 if ((result >= 0) && (result < mem_length)) { 1086 // It fit. 1087 dst->append(&mem_buf[0], result); 1088 return; 1089 } 1090 } 1091} 1092 1093namespace { 1094 1095template <typename STR, typename INT, typename UINT, bool NEG> 1096struct IntToStringT { 1097 // This is to avoid a compiler warning about unary minus on unsigned type. 1098 // For example, say you had the following code: 1099 // template <typename INT> 1100 // INT abs(INT value) { return value < 0 ? -value : value; } 1101 // Even though if INT is unsigned, it's impossible for value < 0, so the 1102 // unary minus will never be taken, the compiler will still generate a 1103 // warning. We do a little specialization dance... 1104 template <typename INT2, typename UINT2, bool NEG2> 1105 struct ToUnsignedT { }; 1106 1107 template <typename INT2, typename UINT2> 1108 struct ToUnsignedT<INT2, UINT2, false> { 1109 static UINT2 ToUnsigned(INT2 value) { 1110 return static_cast<UINT2>(value); 1111 } 1112 }; 1113 1114 template <typename INT2, typename UINT2> 1115 struct ToUnsignedT<INT2, UINT2, true> { 1116 static UINT2 ToUnsigned(INT2 value) { 1117 return static_cast<UINT2>(value < 0 ? -value : value); 1118 } 1119 }; 1120 1121 // This set of templates is very similar to the above templates, but 1122 // for testing whether an integer is negative. 1123 template <typename INT2, bool NEG2> 1124 struct TestNegT { }; 1125 template <typename INT2> 1126 struct TestNegT<INT2, false> { 1127 static bool TestNeg(INT2 value) { 1128 // value is unsigned, and can never be negative. 1129 return false; 1130 } 1131 }; 1132 template <typename INT2> 1133 struct TestNegT<INT2, true> { 1134 static bool TestNeg(INT2 value) { 1135 return value < 0; 1136 } 1137 }; 1138 1139 static STR IntToString(INT value) { 1140 // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. 1141 // So round up to allocate 3 output characters per byte, plus 1 for '-'. 1142 const int kOutputBufSize = 3 * sizeof(INT) + 1; 1143 1144 // Allocate the whole string right away, we will right back to front, and 1145 // then return the substr of what we ended up using. 1146 STR outbuf(kOutputBufSize, 0); 1147 1148 bool is_neg = TestNegT<INT, NEG>::TestNeg(value); 1149 // Even though is_neg will never be true when INT is parameterized as 1150 // unsigned, even the presence of the unary operation causes a warning. 1151 UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value); 1152 1153 for (typename STR::iterator it = outbuf.end();;) { 1154 --it; 1155 DCHECK(it != outbuf.begin()); 1156 *it = static_cast<typename STR::value_type>((res % 10) + '0'); 1157 res /= 10; 1158 1159 // We're done.. 1160 if (res == 0) { 1161 if (is_neg) { 1162 --it; 1163 DCHECK(it != outbuf.begin()); 1164 *it = static_cast<typename STR::value_type>('-'); 1165 } 1166 return STR(it, outbuf.end()); 1167 } 1168 } 1169 NOTREACHED(); 1170 return STR(); 1171 } 1172}; 1173 1174} 1175 1176std::string IntToString(int value) { 1177 return IntToStringT<std::string, int, unsigned int, true>:: 1178 IntToString(value); 1179} 1180std::wstring IntToWString(int value) { 1181 return IntToStringT<std::wstring, int, unsigned int, true>:: 1182 IntToString(value); 1183} 1184string16 IntToString16(int value) { 1185 return IntToStringT<string16, int, unsigned int, true>:: 1186 IntToString(value); 1187} 1188std::string UintToString(unsigned int value) { 1189 return IntToStringT<std::string, unsigned int, unsigned int, false>:: 1190 IntToString(value); 1191} 1192std::wstring UintToWString(unsigned int value) { 1193 return IntToStringT<std::wstring, unsigned int, unsigned int, false>:: 1194 IntToString(value); 1195} 1196string16 UintToString16(unsigned int value) { 1197 return IntToStringT<string16, unsigned int, unsigned int, false>:: 1198 IntToString(value); 1199} 1200std::string Int64ToString(int64 value) { 1201 return IntToStringT<std::string, int64, uint64, true>:: 1202 IntToString(value); 1203} 1204std::wstring Int64ToWString(int64 value) { 1205 return IntToStringT<std::wstring, int64, uint64, true>:: 1206 IntToString(value); 1207} 1208std::string Uint64ToString(uint64 value) { 1209 return IntToStringT<std::string, uint64, uint64, false>:: 1210 IntToString(value); 1211} 1212std::wstring Uint64ToWString(uint64 value) { 1213 return IntToStringT<std::wstring, uint64, uint64, false>:: 1214 IntToString(value); 1215} 1216 1217std::string DoubleToString(double value) { 1218 // According to g_fmt.cc, it is sufficient to declare a buffer of size 32. 1219 char buffer[32]; 1220 dmg_fp::g_fmt(buffer, value); 1221 return std::string(buffer); 1222} 1223 1224std::wstring DoubleToWString(double value) { 1225 return ASCIIToWide(DoubleToString(value)); 1226} 1227 1228void StringAppendV(std::string* dst, const char* format, va_list ap) { 1229 StringAppendVT(dst, format, ap); 1230} 1231 1232void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) { 1233 StringAppendVT(dst, format, ap); 1234} 1235 1236std::string StringPrintf(const char* format, ...) { 1237 va_list ap; 1238 va_start(ap, format); 1239 std::string result; 1240 StringAppendV(&result, format, ap); 1241 va_end(ap); 1242 return result; 1243} 1244 1245std::wstring StringPrintf(const wchar_t* format, ...) { 1246 va_list ap; 1247 va_start(ap, format); 1248 std::wstring result; 1249 StringAppendV(&result, format, ap); 1250 va_end(ap); 1251 return result; 1252} 1253 1254std::string StringPrintV(const char* format, va_list ap) { 1255 std::string result; 1256 StringAppendV(&result, format, ap); 1257 return result; 1258} 1259 1260const std::string& SStringPrintf(std::string* dst, const char* format, ...) { 1261 va_list ap; 1262 va_start(ap, format); 1263 dst->clear(); 1264 StringAppendV(dst, format, ap); 1265 va_end(ap); 1266 return *dst; 1267} 1268 1269const std::wstring& SStringPrintf(std::wstring* dst, 1270 const wchar_t* format, ...) { 1271 va_list ap; 1272 va_start(ap, format); 1273 dst->clear(); 1274 StringAppendV(dst, format, ap); 1275 va_end(ap); 1276 return *dst; 1277} 1278 1279void StringAppendF(std::string* dst, const char* format, ...) { 1280 va_list ap; 1281 va_start(ap, format); 1282 StringAppendV(dst, format, ap); 1283 va_end(ap); 1284} 1285 1286void StringAppendF(std::wstring* dst, const wchar_t* format, ...) { 1287 va_list ap; 1288 va_start(ap, format); 1289 StringAppendV(dst, format, ap); 1290 va_end(ap); 1291} 1292 1293template<typename STR> 1294static void SplitStringT(const STR& str, 1295 const typename STR::value_type s, 1296 bool trim_whitespace, 1297 std::vector<STR>* r) { 1298 size_t last = 0; 1299 size_t i; 1300 size_t c = str.size(); 1301 for (i = 0; i <= c; ++i) { 1302 if (i == c || str[i] == s) { 1303 size_t len = i - last; 1304 STR tmp = str.substr(last, len); 1305 if (trim_whitespace) { 1306 STR t_tmp; 1307 TrimWhitespace(tmp, TRIM_ALL, &t_tmp); 1308 r->push_back(t_tmp); 1309 } else { 1310 r->push_back(tmp); 1311 } 1312 last = i + 1; 1313 } 1314 } 1315} 1316 1317void SplitString(const std::wstring& str, 1318 wchar_t s, 1319 std::vector<std::wstring>* r) { 1320 SplitStringT(str, s, true, r); 1321} 1322 1323#if !defined(WCHAR_T_IS_UTF16) 1324void SplitString(const string16& str, 1325 char16 s, 1326 std::vector<string16>* r) { 1327 SplitStringT(str, s, true, r); 1328} 1329#endif 1330 1331void SplitString(const std::string& str, 1332 char s, 1333 std::vector<std::string>* r) { 1334 SplitStringT(str, s, true, r); 1335} 1336 1337void SplitStringDontTrim(const std::wstring& str, 1338 wchar_t s, 1339 std::vector<std::wstring>* r) { 1340 SplitStringT(str, s, false, r); 1341} 1342 1343#if !defined(WCHAR_T_IS_UTF16) 1344void SplitStringDontTrim(const string16& str, 1345 char16 s, 1346 std::vector<string16>* r) { 1347 SplitStringT(str, s, false, r); 1348} 1349#endif 1350 1351void SplitStringDontTrim(const std::string& str, 1352 char s, 1353 std::vector<std::string>* r) { 1354 SplitStringT(str, s, false, r); 1355} 1356 1357template <typename STR> 1358static void SplitStringUsingSubstrT(const STR& str, 1359 const STR& s, 1360 std::vector<STR>* r) { 1361 typename STR::size_type begin_index = 0; 1362 while (true) { 1363 const typename STR::size_type end_index = str.find(s, begin_index); 1364 if (end_index == STR::npos) { 1365 const STR term = str.substr(begin_index); 1366 STR tmp; 1367 TrimWhitespace(term, TRIM_ALL, &tmp); 1368 r->push_back(tmp); 1369 return; 1370 } 1371 const STR term = str.substr(begin_index, end_index - begin_index); 1372 STR tmp; 1373 TrimWhitespace(term, TRIM_ALL, &tmp); 1374 r->push_back(tmp); 1375 begin_index = end_index + s.size(); 1376 } 1377} 1378 1379void SplitStringUsingSubstr(const string16& str, 1380 const string16& s, 1381 std::vector<string16>* r) { 1382 SplitStringUsingSubstrT(str, s, r); 1383} 1384 1385void SplitStringUsingSubstr(const std::string& str, 1386 const std::string& s, 1387 std::vector<std::string>* r) { 1388 SplitStringUsingSubstrT(str, s, r); 1389} 1390 1391template<typename STR> 1392static size_t TokenizeT(const STR& str, 1393 const STR& delimiters, 1394 std::vector<STR>* tokens) { 1395 tokens->clear(); 1396 1397 typename STR::size_type start = str.find_first_not_of(delimiters); 1398 while (start != STR::npos) { 1399 typename STR::size_type end = str.find_first_of(delimiters, start + 1); 1400 if (end == STR::npos) { 1401 tokens->push_back(str.substr(start)); 1402 break; 1403 } else { 1404 tokens->push_back(str.substr(start, end - start)); 1405 start = str.find_first_not_of(delimiters, end + 1); 1406 } 1407 } 1408 1409 return tokens->size(); 1410} 1411 1412size_t Tokenize(const std::wstring& str, 1413 const std::wstring& delimiters, 1414 std::vector<std::wstring>* tokens) { 1415 return TokenizeT(str, delimiters, tokens); 1416} 1417 1418#if !defined(WCHAR_T_IS_UTF16) 1419size_t Tokenize(const string16& str, 1420 const string16& delimiters, 1421 std::vector<string16>* tokens) { 1422 return TokenizeT(str, delimiters, tokens); 1423} 1424#endif 1425 1426size_t Tokenize(const std::string& str, 1427 const std::string& delimiters, 1428 std::vector<std::string>* tokens) { 1429 return TokenizeT(str, delimiters, tokens); 1430} 1431 1432size_t Tokenize(const base::StringPiece& str, 1433 const base::StringPiece& delimiters, 1434 std::vector<base::StringPiece>* tokens) { 1435 return TokenizeT(str, delimiters, tokens); 1436} 1437 1438template<typename STR> 1439static STR JoinStringT(const std::vector<STR>& parts, 1440 typename STR::value_type sep) { 1441 if (parts.size() == 0) return STR(); 1442 1443 STR result(parts[0]); 1444 typename std::vector<STR>::const_iterator iter = parts.begin(); 1445 ++iter; 1446 1447 for (; iter != parts.end(); ++iter) { 1448 result += sep; 1449 result += *iter; 1450 } 1451 1452 return result; 1453} 1454 1455std::string JoinString(const std::vector<std::string>& parts, char sep) { 1456 return JoinStringT(parts, sep); 1457} 1458 1459#if !defined(WCHAR_T_IS_UTF16) 1460string16 JoinString(const std::vector<string16>& parts, char16 sep) { 1461 return JoinStringT(parts, sep); 1462} 1463#endif 1464 1465std::wstring JoinString(const std::vector<std::wstring>& parts, wchar_t sep) { 1466 return JoinStringT(parts, sep); 1467} 1468 1469template<typename STR> 1470void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { 1471 const size_t length = str.length(); 1472 if (!length) 1473 return; 1474 1475 bool last_was_ws = false; 1476 size_t last_non_ws_start = 0; 1477 for (size_t i = 0; i < length; ++i) { 1478 switch (str[i]) { 1479 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR. 1480 case L' ': 1481 case L'\t': 1482 case L'\xA': 1483 case L'\xB': 1484 case L'\xC': 1485 case L'\xD': 1486 if (!last_was_ws) { 1487 if (i > 0) { 1488 result->push_back( 1489 str.substr(last_non_ws_start, i - last_non_ws_start)); 1490 } 1491 last_was_ws = true; 1492 } 1493 break; 1494 1495 default: // Not a space character. 1496 if (last_was_ws) { 1497 last_was_ws = false; 1498 last_non_ws_start = i; 1499 } 1500 break; 1501 } 1502 } 1503 if (!last_was_ws) { 1504 result->push_back( 1505 str.substr(last_non_ws_start, length - last_non_ws_start)); 1506 } 1507} 1508 1509void SplitStringAlongWhitespace(const std::wstring& str, 1510 std::vector<std::wstring>* result) { 1511 SplitStringAlongWhitespaceT(str, result); 1512} 1513 1514#if !defined(WCHAR_T_IS_UTF16) 1515void SplitStringAlongWhitespace(const string16& str, 1516 std::vector<string16>* result) { 1517 SplitStringAlongWhitespaceT(str, result); 1518} 1519#endif 1520 1521void SplitStringAlongWhitespace(const std::string& str, 1522 std::vector<std::string>* result) { 1523 SplitStringAlongWhitespaceT(str, result); 1524} 1525 1526template<class FormatStringType, class OutStringType> 1527OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, 1528 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { 1529 size_t substitutions = subst.size(); 1530 DCHECK(substitutions < 10); 1531 1532 size_t sub_length = 0; 1533 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); 1534 iter != subst.end(); ++iter) { 1535 sub_length += (*iter).length(); 1536 } 1537 1538 OutStringType formatted; 1539 formatted.reserve(format_string.length() + sub_length); 1540 1541 std::vector<ReplacementOffset> r_offsets; 1542 for (typename FormatStringType::const_iterator i = format_string.begin(); 1543 i != format_string.end(); ++i) { 1544 if ('$' == *i) { 1545 if (i + 1 != format_string.end()) { 1546 ++i; 1547 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; 1548 if ('$' == *i) { 1549 formatted.push_back('$'); 1550 } else { 1551 uintptr_t index = *i - '1'; 1552 if (offsets) { 1553 ReplacementOffset r_offset(index, 1554 static_cast<int>(formatted.size())); 1555 r_offsets.insert(std::lower_bound(r_offsets.begin(), 1556 r_offsets.end(), r_offset, 1557 &CompareParameter), 1558 r_offset); 1559 } 1560 if (index < substitutions) 1561 formatted.append(subst.at(index)); 1562 } 1563 } 1564 } else { 1565 formatted.push_back(*i); 1566 } 1567 } 1568 if (offsets) { 1569 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); 1570 i != r_offsets.end(); ++i) { 1571 offsets->push_back(i->offset); 1572 } 1573 } 1574 return formatted; 1575} 1576 1577string16 ReplaceStringPlaceholders(const string16& format_string, 1578 const std::vector<string16>& subst, 1579 std::vector<size_t>* offsets) { 1580 return DoReplaceStringPlaceholders(format_string, subst, offsets); 1581} 1582 1583std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, 1584 const std::vector<std::string>& subst, 1585 std::vector<size_t>* offsets) { 1586 return DoReplaceStringPlaceholders(format_string, subst, offsets); 1587} 1588 1589string16 ReplaceStringPlaceholders(const string16& format_string, 1590 const string16& a, 1591 size_t* offset) { 1592 std::vector<size_t> offsets; 1593 std::vector<string16> subst; 1594 subst.push_back(a); 1595 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 1596 1597 DCHECK(offsets.size() == 1); 1598 if (offset) { 1599 *offset = offsets[0]; 1600 } 1601 return result; 1602} 1603 1604template <class CHAR> 1605static bool IsWildcard(CHAR character) { 1606 return character == '*' || character == '?'; 1607} 1608 1609// Move the strings pointers to the point where they start to differ. 1610template <class CHAR> 1611static void EatSameChars(const CHAR** pattern, const CHAR** string) { 1612 bool escaped = false; 1613 while (**pattern && **string) { 1614 if (!escaped && IsWildcard(**pattern)) { 1615 // We don't want to match wildcard here, except if it's escaped. 1616 return; 1617 } 1618 1619 // Check if the escapement char is found. If so, skip it and move to the 1620 // next character. 1621 if (!escaped && **pattern == L'\\') { 1622 escaped = true; 1623 (*pattern)++; 1624 continue; 1625 } 1626 1627 // Check if the chars match, if so, increment the ptrs. 1628 if (**pattern == **string) { 1629 (*pattern)++; 1630 (*string)++; 1631 } else { 1632 // Uh ho, it did not match, we are done. If the last char was an 1633 // escapement, that means that it was an error to advance the ptr here, 1634 // let's put it back where it was. This also mean that the MatchPattern 1635 // function will return false because if we can't match an escape char 1636 // here, then no one will. 1637 if (escaped) { 1638 (*pattern)--; 1639 } 1640 return; 1641 } 1642 1643 escaped = false; 1644 } 1645} 1646 1647template <class CHAR> 1648static void EatWildcard(const CHAR** pattern) { 1649 while (**pattern) { 1650 if (!IsWildcard(**pattern)) 1651 return; 1652 (*pattern)++; 1653 } 1654} 1655 1656template <class CHAR> 1657static bool MatchPatternT(const CHAR* eval, const CHAR* pattern, int depth) { 1658 const int kMaxDepth = 16; 1659 if (depth > kMaxDepth) 1660 return false; 1661 1662 // Eat all the matching chars. 1663 EatSameChars(&pattern, &eval); 1664 1665 // If the string is empty, then the pattern must be empty too, or contains 1666 // only wildcards. 1667 if (*eval == 0) { 1668 EatWildcard(&pattern); 1669 if (*pattern) 1670 return false; 1671 return true; 1672 } 1673 1674 // Pattern is empty but not string, this is not a match. 1675 if (*pattern == 0) 1676 return false; 1677 1678 // If this is a question mark, then we need to compare the rest with 1679 // the current string or the string with one character eaten. 1680 if (pattern[0] == '?') { 1681 if (MatchPatternT(eval, pattern + 1, depth + 1) || 1682 MatchPatternT(eval + 1, pattern + 1, depth + 1)) 1683 return true; 1684 } 1685 1686 // This is a *, try to match all the possible substrings with the remainder 1687 // of the pattern. 1688 if (pattern[0] == '*') { 1689 while (*eval) { 1690 if (MatchPatternT(eval, pattern + 1, depth + 1)) 1691 return true; 1692 eval++; 1693 } 1694 1695 // We reached the end of the string, let see if the pattern contains only 1696 // wildcards. 1697 if (*eval == 0) { 1698 EatWildcard(&pattern); 1699 if (*pattern) 1700 return false; 1701 return true; 1702 } 1703 } 1704 1705 return false; 1706} 1707 1708bool MatchPatternWide(const std::wstring& eval, const std::wstring& pattern) { 1709 return MatchPatternT(eval.c_str(), pattern.c_str(), 0); 1710} 1711 1712bool MatchPatternASCII(const std::string& eval, const std::string& pattern) { 1713 DCHECK(IsStringASCII(eval) && IsStringASCII(pattern)); 1714 return MatchPatternT(eval.c_str(), pattern.c_str(), 0); 1715} 1716 1717bool StringToInt(const std::string& input, int* output) { 1718 return StringToNumber<StringToIntTraits>(input, output); 1719} 1720 1721bool StringToInt(const string16& input, int* output) { 1722 return StringToNumber<String16ToIntTraits>(input, output); 1723} 1724 1725bool StringToInt64(const std::string& input, int64* output) { 1726 return StringToNumber<StringToInt64Traits>(input, output); 1727} 1728 1729bool StringToInt64(const string16& input, int64* output) { 1730 return StringToNumber<String16ToInt64Traits>(input, output); 1731} 1732 1733bool HexStringToInt(const std::string& input, int* output) { 1734 return StringToNumber<HexStringToIntTraits>(input, output); 1735} 1736 1737bool HexStringToInt(const string16& input, int* output) { 1738 return StringToNumber<HexString16ToIntTraits>(input, output); 1739} 1740 1741namespace { 1742 1743template<class CHAR> 1744bool HexDigitToIntT(const CHAR digit, uint8* val) { 1745 if (digit >= '0' && digit <= '9') 1746 *val = digit - '0'; 1747 else if (digit >= 'a' && digit <= 'f') 1748 *val = 10 + digit - 'a'; 1749 else if (digit >= 'A' && digit <= 'F') 1750 *val = 10 + digit - 'A'; 1751 else 1752 return false; 1753 return true; 1754} 1755 1756template<typename STR> 1757bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) { 1758 DCHECK(output->size() == 0); 1759 size_t count = input.size(); 1760 if (count == 0 || (count % 2) != 0) 1761 return false; 1762 for (uintptr_t i = 0; i < count / 2; ++i) { 1763 uint8 msb = 0; // most significant 4 bits 1764 uint8 lsb = 0; // least significant 4 bits 1765 if (!HexDigitToIntT(input[i * 2], &msb) || 1766 !HexDigitToIntT(input[i * 2 + 1], &lsb)) 1767 return false; 1768 output->push_back((msb << 4) | lsb); 1769 } 1770 return true; 1771} 1772 1773} // namespace 1774 1775bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) { 1776 return HexStringToBytesT(input, output); 1777} 1778 1779bool HexStringToBytes(const string16& input, std::vector<uint8>* output) { 1780 return HexStringToBytesT(input, output); 1781} 1782 1783int StringToInt(const std::string& value) { 1784 int result; 1785 StringToInt(value, &result); 1786 return result; 1787} 1788 1789int StringToInt(const string16& value) { 1790 int result; 1791 StringToInt(value, &result); 1792 return result; 1793} 1794 1795int64 StringToInt64(const std::string& value) { 1796 int64 result; 1797 StringToInt64(value, &result); 1798 return result; 1799} 1800 1801int64 StringToInt64(const string16& value) { 1802 int64 result; 1803 StringToInt64(value, &result); 1804 return result; 1805} 1806 1807int HexStringToInt(const std::string& value) { 1808 int result; 1809 HexStringToInt(value, &result); 1810 return result; 1811} 1812 1813int HexStringToInt(const string16& value) { 1814 int result; 1815 HexStringToInt(value, &result); 1816 return result; 1817} 1818 1819bool StringToDouble(const std::string& input, double* output) { 1820 return StringToNumber<StringToDoubleTraits>(input, output); 1821} 1822 1823bool StringToDouble(const string16& input, double* output) { 1824 return StringToNumber<String16ToDoubleTraits>(input, output); 1825} 1826 1827double StringToDouble(const std::string& value) { 1828 double result; 1829 StringToDouble(value, &result); 1830 return result; 1831} 1832 1833double StringToDouble(const string16& value) { 1834 double result; 1835 StringToDouble(value, &result); 1836 return result; 1837} 1838 1839// The following code is compatible with the OpenBSD lcpy interface. See: 1840// http://www.gratisoft.us/todd/papers/strlcpy.html 1841// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 1842 1843namespace { 1844 1845template <typename CHAR> 1846size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 1847 for (size_t i = 0; i < dst_size; ++i) { 1848 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 1849 return i; 1850 } 1851 1852 // We were left off at dst_size. We over copied 1 byte. Null terminate. 1853 if (dst_size != 0) 1854 dst[dst_size - 1] = 0; 1855 1856 // Count the rest of the |src|, and return it's length in characters. 1857 while (src[dst_size]) ++dst_size; 1858 return dst_size; 1859} 1860 1861} // namespace 1862 1863size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { 1864 return lcpyT<char>(dst, src, dst_size); 1865} 1866size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 1867 return lcpyT<wchar_t>(dst, src, dst_size); 1868} 1869 1870bool ElideString(const std::wstring& input, int max_len, std::wstring* output) { 1871 DCHECK(max_len >= 0); 1872 if (static_cast<int>(input.length()) <= max_len) { 1873 output->assign(input); 1874 return false; 1875 } 1876 1877 switch (max_len) { 1878 case 0: 1879 output->clear(); 1880 break; 1881 case 1: 1882 output->assign(input.substr(0, 1)); 1883 break; 1884 case 2: 1885 output->assign(input.substr(0, 2)); 1886 break; 1887 case 3: 1888 output->assign(input.substr(0, 1) + L"." + 1889 input.substr(input.length() - 1)); 1890 break; 1891 case 4: 1892 output->assign(input.substr(0, 1) + L".." + 1893 input.substr(input.length() - 1)); 1894 break; 1895 default: { 1896 int rstr_len = (max_len - 3) / 2; 1897 int lstr_len = rstr_len + ((max_len - 3) % 2); 1898 output->assign(input.substr(0, lstr_len) + L"..." + 1899 input.substr(input.length() - rstr_len)); 1900 break; 1901 } 1902 } 1903 1904 return true; 1905} 1906 1907std::string HexEncode(const void* bytes, size_t size) { 1908 static const char kHexChars[] = "0123456789ABCDEF"; 1909 1910 // Each input byte creates two output hex characters. 1911 std::string ret(size * 2, '\0'); 1912 1913 for (size_t i = 0; i < size; ++i) { 1914 char b = reinterpret_cast<const char*>(bytes)[i]; 1915 ret[(i * 2)] = kHexChars[(b >> 4) & 0xf]; 1916 ret[(i * 2) + 1] = kHexChars[b & 0xf]; 1917 } 1918 return ret; 1919} 1920