string_util.h revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// This file defines utility functions for working with strings. 6 7#ifndef BASE_STRING_UTIL_H_ 8#define BASE_STRING_UTIL_H_ 9 10#include <stdarg.h> // va_list 11 12#include <string> 13#include <vector> 14 15#include "base/basictypes.h" 16#include "base/compiler_specific.h" 17#include "base/string16.h" 18#include "base/string_piece.h" // For implicit conversions. 19 20// Safe standard library wrappers for all platforms. 21 22namespace base { 23 24// C standard-library functions like "strncasecmp" and "snprintf" that aren't 25// cross-platform are provided as "base::strncasecmp", and their prototypes 26// are listed below. These functions are then implemented as inline calls 27// to the platform-specific equivalents in the platform-specific headers. 28 29// Compares the two strings s1 and s2 without regard to case using 30// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if 31// s2 > s1 according to a lexicographic comparison. 32int strcasecmp(const char* s1, const char* s2); 33 34// Compares up to count characters of s1 and s2 without regard to case using 35// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if 36// s2 > s1 according to a lexicographic comparison. 37int strncasecmp(const char* s1, const char* s2, size_t count); 38 39// Same as strncmp but for char16 strings. 40int strncmp16(const char16* s1, const char16* s2, size_t count); 41 42// Wrapper for vsnprintf that always null-terminates and always returns the 43// number of characters that would be in an untruncated formatted 44// string, even when truncation occurs. 45int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) 46 PRINTF_FORMAT(3, 0); 47 48// vswprintf always null-terminates, but when truncation occurs, it will either 49// return -1 or the number of characters that would be in an untruncated 50// formatted string. The actual return value depends on the underlying 51// C library's vswprintf implementation. 52int vswprintf(wchar_t* buffer, size_t size, 53 const wchar_t* format, va_list arguments) WPRINTF_FORMAT(3, 0); 54 55// Some of these implementations need to be inlined. 56 57// We separate the declaration from the implementation of this inline 58// function just so the PRINTF_FORMAT works. 59inline int snprintf(char* buffer, size_t size, const char* format, ...) 60 PRINTF_FORMAT(3, 4); 61inline int snprintf(char* buffer, size_t size, const char* format, ...) { 62 va_list arguments; 63 va_start(arguments, format); 64 int result = vsnprintf(buffer, size, format, arguments); 65 va_end(arguments); 66 return result; 67} 68 69// We separate the declaration from the implementation of this inline 70// function just so the WPRINTF_FORMAT works. 71inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) 72 WPRINTF_FORMAT(3, 4); 73inline int swprintf(wchar_t* buffer, size_t size, const wchar_t* format, ...) { 74 va_list arguments; 75 va_start(arguments, format); 76 int result = vswprintf(buffer, size, format, arguments); 77 va_end(arguments); 78 return result; 79} 80 81// BSD-style safe and consistent string copy functions. 82// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. 83// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as 84// long as |dst_size| is not 0. Returns the length of |src| in characters. 85// If the return value is >= dst_size, then the output was truncated. 86// NOTE: All sizes are in number of characters, NOT in bytes. 87size_t strlcpy(char* dst, const char* src, size_t dst_size); 88size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); 89 90// Scan a wprintf format string to determine whether it's portable across a 91// variety of systems. This function only checks that the conversion 92// specifiers used by the format string are supported and have the same meaning 93// on a variety of systems. It doesn't check for other errors that might occur 94// within a format string. 95// 96// Nonportable conversion specifiers for wprintf are: 97// - 's' and 'c' without an 'l' length modifier. %s and %c operate on char 98// data on all systems except Windows, which treat them as wchar_t data. 99// Use %ls and %lc for wchar_t data instead. 100// - 'S' and 'C', which operate on wchar_t data on all systems except Windows, 101// which treat them as char data. Use %ls and %lc for wchar_t data 102// instead. 103// - 'F', which is not identified by Windows wprintf documentation. 104// - 'D', 'O', and 'U', which are deprecated and not available on all systems. 105// Use %ld, %lo, and %lu instead. 106// 107// Note that there is no portable conversion specifier for char data when 108// working with wprintf. 109// 110// This function is intended to be called from base::vswprintf. 111bool IsWprintfFormatPortable(const wchar_t* format); 112 113} // namespace base 114 115#if defined(OS_WIN) 116#include "base/string_util_win.h" 117#elif defined(OS_POSIX) 118#include "base/string_util_posix.h" 119#else 120#error Define string operations appropriately for your platform 121#endif 122 123// These threadsafe functions return references to globally unique empty 124// strings. 125// 126// DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT CONSTRUCTORS. 127// There is only one case where you should use these: functions which need to 128// return a string by reference (e.g. as a class member accessor), and don't 129// have an empty string to use (e.g. in an error case). These should not be 130// used as initializers, function arguments, or return values for functions 131// which return by value or outparam. 132const std::string& EmptyString(); 133const std::wstring& EmptyWString(); 134const string16& EmptyString16(); 135 136extern const wchar_t kWhitespaceWide[]; 137extern const char16 kWhitespaceUTF16[]; 138extern const char kWhitespaceASCII[]; 139 140extern const char kUtf8ByteOrderMark[]; 141 142// Removes characters in remove_chars from anywhere in input. Returns true if 143// any characters were removed. 144// NOTE: Safe to use the same variable for both input and output. 145bool RemoveChars(const std::wstring& input, 146 const wchar_t remove_chars[], 147 std::wstring* output); 148bool RemoveChars(const string16& input, 149 const char16 remove_chars[], 150 string16* output); 151bool RemoveChars(const std::string& input, 152 const char remove_chars[], 153 std::string* output); 154 155// Removes characters in trim_chars from the beginning and end of input. 156// NOTE: Safe to use the same variable for both input and output. 157bool TrimString(const std::wstring& input, 158 const wchar_t trim_chars[], 159 std::wstring* output); 160bool TrimString(const string16& input, 161 const char16 trim_chars[], 162 string16* output); 163bool TrimString(const std::string& input, 164 const char trim_chars[], 165 std::string* output); 166 167// Truncates a string to the nearest UTF-8 character that will leave 168// the string less than or equal to the specified byte size. 169void TruncateUTF8ToByteSize(const std::string& input, 170 const size_t byte_size, 171 std::string* output); 172 173// Trims any whitespace from either end of the input string. Returns where 174// whitespace was found. 175// The non-wide version has two functions: 176// * TrimWhitespaceASCII() 177// This function is for ASCII strings and only looks for ASCII whitespace; 178// Please choose the best one according to your usage. 179// NOTE: Safe to use the same variable for both input and output. 180enum TrimPositions { 181 TRIM_NONE = 0, 182 TRIM_LEADING = 1 << 0, 183 TRIM_TRAILING = 1 << 1, 184 TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, 185}; 186TrimPositions TrimWhitespace(const std::wstring& input, 187 TrimPositions positions, 188 std::wstring* output); 189TrimPositions TrimWhitespace(const string16& input, 190 TrimPositions positions, 191 string16* output); 192TrimPositions TrimWhitespaceASCII(const std::string& input, 193 TrimPositions positions, 194 std::string* output); 195 196// Deprecated. This function is only for backward compatibility and calls 197// TrimWhitespaceASCII(). 198TrimPositions TrimWhitespace(const std::string& input, 199 TrimPositions positions, 200 std::string* output); 201 202// Searches for CR or LF characters. Removes all contiguous whitespace 203// strings that contain them. This is useful when trying to deal with text 204// copied from terminals. 205// Returns |text|, with the following three transformations: 206// (1) Leading and trailing whitespace is trimmed. 207// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace 208// sequences containing a CR or LF are trimmed. 209// (3) All other whitespace sequences are converted to single spaces. 210std::wstring CollapseWhitespace(const std::wstring& text, 211 bool trim_sequences_with_line_breaks); 212string16 CollapseWhitespace(const string16& text, 213 bool trim_sequences_with_line_breaks); 214std::string CollapseWhitespaceASCII(const std::string& text, 215 bool trim_sequences_with_line_breaks); 216 217// Returns true if the passed string is empty or contains only white-space 218// characters. 219bool ContainsOnlyWhitespaceASCII(const std::string& str); 220bool ContainsOnlyWhitespace(const string16& str); 221 222// Returns true if |input| is empty or contains only characters found in 223// |characters|. 224bool ContainsOnlyChars(const std::wstring& input, 225 const std::wstring& characters); 226bool ContainsOnlyChars(const string16& input, const string16& characters); 227bool ContainsOnlyChars(const std::string& input, const std::string& characters); 228 229// These convert between ASCII (7-bit) and Wide/UTF16 strings. 230std::string WideToASCII(const std::wstring& wide); 231std::wstring ASCIIToWide(const base::StringPiece& ascii); 232std::string UTF16ToASCII(const string16& utf16); 233string16 ASCIIToUTF16(const base::StringPiece& ascii); 234 235// Converts the given wide string to the corresponding Latin1. This will fail 236// (return false) if any characters are more than 255. 237bool WideToLatin1(const std::wstring& wide, std::string* latin1); 238 239// Returns true if the specified string matches the criteria. How can a wide 240// string be 8-bit or UTF8? It contains only characters that are < 256 (in the 241// first case) or characters that use only 8-bits and whose 8-bit 242// representation looks like a UTF-8 string (the second case). 243// 244// Note that IsStringUTF8 checks not only if the input is structrually 245// valid but also if it doesn't contain any non-character codepoint 246// (e.g. U+FFFE). It's done on purpose because all the existing callers want 247// to have the maximum 'discriminating' power from other encodings. If 248// there's a use case for just checking the structural validity, we have to 249// add a new function for that. 250bool IsString8Bit(const std::wstring& str); 251bool IsStringUTF8(const std::string& str); 252bool IsStringASCII(const std::wstring& str); 253bool IsStringASCII(const base::StringPiece& str); 254bool IsStringASCII(const string16& str); 255 256// ASCII-specific tolower. The standard library's tolower is locale sensitive, 257// so we don't want to use it here. 258template <class Char> inline Char ToLowerASCII(Char c) { 259 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; 260} 261 262// Converts the elements of the given string. This version uses a pointer to 263// clearly differentiate it from the non-pointer variant. 264template <class str> inline void StringToLowerASCII(str* s) { 265 for (typename str::iterator i = s->begin(); i != s->end(); ++i) 266 *i = ToLowerASCII(*i); 267} 268 269template <class str> inline str StringToLowerASCII(const str& s) { 270 // for std::string and std::wstring 271 str output(s); 272 StringToLowerASCII(&output); 273 return output; 274} 275 276// ASCII-specific toupper. The standard library's toupper is locale sensitive, 277// so we don't want to use it here. 278template <class Char> inline Char ToUpperASCII(Char c) { 279 return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; 280} 281 282// Converts the elements of the given string. This version uses a pointer to 283// clearly differentiate it from the non-pointer variant. 284template <class str> inline void StringToUpperASCII(str* s) { 285 for (typename str::iterator i = s->begin(); i != s->end(); ++i) 286 *i = ToUpperASCII(*i); 287} 288 289template <class str> inline str StringToUpperASCII(const str& s) { 290 // for std::string and std::wstring 291 str output(s); 292 StringToUpperASCII(&output); 293 return output; 294} 295 296// Compare the lower-case form of the given string against the given ASCII 297// string. This is useful for doing checking if an input string matches some 298// token, and it is optimized to avoid intermediate string copies. This API is 299// borrowed from the equivalent APIs in Mozilla. 300bool LowerCaseEqualsASCII(const std::string& a, const char* b); 301bool LowerCaseEqualsASCII(const std::wstring& a, const char* b); 302bool LowerCaseEqualsASCII(const string16& a, const char* b); 303 304// Same thing, but with string iterators instead. 305bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 306 std::string::const_iterator a_end, 307 const char* b); 308bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin, 309 std::wstring::const_iterator a_end, 310 const char* b); 311bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 312 string16::const_iterator a_end, 313 const char* b); 314bool LowerCaseEqualsASCII(const char* a_begin, 315 const char* a_end, 316 const char* b); 317bool LowerCaseEqualsASCII(const wchar_t* a_begin, 318 const wchar_t* a_end, 319 const char* b); 320bool LowerCaseEqualsASCII(const char16* a_begin, 321 const char16* a_end, 322 const char* b); 323 324// Performs a case-sensitive string compare. The behavior is undefined if both 325// strings are not ASCII. 326bool EqualsASCII(const string16& a, const base::StringPiece& b); 327 328// Returns true if str starts with search, or false otherwise. 329bool StartsWithASCII(const std::string& str, 330 const std::string& search, 331 bool case_sensitive); 332bool StartsWith(const std::wstring& str, 333 const std::wstring& search, 334 bool case_sensitive); 335bool StartsWith(const string16& str, 336 const string16& search, 337 bool case_sensitive); 338 339// Returns true if str ends with search, or false otherwise. 340bool EndsWith(const std::string& str, 341 const std::string& search, 342 bool case_sensitive); 343bool EndsWith(const std::wstring& str, 344 const std::wstring& search, 345 bool case_sensitive); 346bool EndsWith(const string16& str, 347 const string16& search, 348 bool case_sensitive); 349 350 351// Determines the type of ASCII character, independent of locale (the C 352// library versions will change based on locale). 353template <typename Char> 354inline bool IsAsciiWhitespace(Char c) { 355 return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 356} 357template <typename Char> 358inline bool IsAsciiAlpha(Char c) { 359 return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); 360} 361template <typename Char> 362inline bool IsAsciiDigit(Char c) { 363 return c >= '0' && c <= '9'; 364} 365 366// Returns true if it's a whitespace character. 367inline bool IsWhitespace(wchar_t c) { 368 return wcschr(kWhitespaceWide, c) != NULL; 369} 370 371enum DataUnits { 372 DATA_UNITS_BYTE = 0, 373 DATA_UNITS_KIBIBYTE, 374 DATA_UNITS_MEBIBYTE, 375 DATA_UNITS_GIBIBYTE, 376}; 377 378// Return the unit type that is appropriate for displaying the amount of bytes 379// passed in. 380DataUnits GetByteDisplayUnits(int64 bytes); 381 382// Return a byte string in human-readable format, displayed in units appropriate 383// specified by 'units', with an optional unit suffix. 384// Ex: FormatBytes(512, DATA_UNITS_KIBIBYTE, true) => "0.5 KB" 385// Ex: FormatBytes(10*1024, DATA_UNITS_MEBIBYTE, false) => "0.1" 386std::wstring FormatBytes(int64 bytes, DataUnits units, bool show_units); 387 388// As above, but with "/s" units. 389// Ex: FormatSpeed(512, DATA_UNITS_KIBIBYTE, true) => "0.5 KB/s" 390// Ex: FormatSpeed(10*1024, DATA_UNITS_MEBIBYTE, false) => "0.1" 391std::wstring FormatSpeed(int64 bytes, DataUnits units, bool show_units); 392 393// Return a number formated with separators in the user's locale way. 394// Ex: FormatNumber(1234567) => 1,234,567 395std::wstring FormatNumber(int64 number); 396 397// Starting at |start_offset| (usually 0), replace the first instance of 398// |find_this| with |replace_with|. 399void ReplaceFirstSubstringAfterOffset(string16* str, 400 string16::size_type start_offset, 401 const string16& find_this, 402 const string16& replace_with); 403void ReplaceFirstSubstringAfterOffset(std::string* str, 404 std::string::size_type start_offset, 405 const std::string& find_this, 406 const std::string& replace_with); 407 408// Starting at |start_offset| (usually 0), look through |str| and replace all 409// instances of |find_this| with |replace_with|. 410// 411// This does entire substrings; use std::replace in <algorithm> for single 412// characters, for example: 413// std::replace(str.begin(), str.end(), 'a', 'b'); 414void ReplaceSubstringsAfterOffset(string16* str, 415 string16::size_type start_offset, 416 const string16& find_this, 417 const string16& replace_with); 418void ReplaceSubstringsAfterOffset(std::string* str, 419 std::string::size_type start_offset, 420 const std::string& find_this, 421 const std::string& replace_with); 422 423// Specialized string-conversion functions. 424std::string IntToString(int value); 425std::wstring IntToWString(int value); 426string16 IntToString16(int value); 427std::string UintToString(unsigned int value); 428std::wstring UintToWString(unsigned int value); 429string16 UintToString16(unsigned int value); 430std::string Int64ToString(int64 value); 431std::wstring Int64ToWString(int64 value); 432std::string Uint64ToString(uint64 value); 433std::wstring Uint64ToWString(uint64 value); 434// The DoubleToString methods convert the double to a string format that 435// ignores the locale. If you want to use locale specific formatting, use ICU. 436std::string DoubleToString(double value); 437std::wstring DoubleToWString(double value); 438 439// Perform a best-effort conversion of the input string to a numeric type, 440// setting |*output| to the result of the conversion. Returns true for 441// "perfect" conversions; returns false in the following cases: 442// - Overflow/underflow. |*output| will be set to the maximum value supported 443// by the data type. 444// - Trailing characters in the string after parsing the number. |*output| 445// will be set to the value of the number that was parsed. 446// - No characters parseable as a number at the beginning of the string. 447// |*output| will be set to 0. 448// - Empty string. |*output| will be set to 0. 449bool StringToInt(const std::string& input, int* output); 450bool StringToInt(const string16& input, int* output); 451bool StringToInt64(const std::string& input, int64* output); 452bool StringToInt64(const string16& input, int64* output); 453bool HexStringToInt(const std::string& input, int* output); 454bool HexStringToInt(const string16& input, int* output); 455 456// Similar to the previous functions, except that output is a vector of bytes. 457// |*output| will contain as many bytes as were successfully parsed prior to the 458// error. There is no overflow, but input.size() must be evenly divisible by 2. 459// Leading 0x or +/- are not allowed. 460bool HexStringToBytes(const std::string& input, std::vector<uint8>* output); 461bool HexStringToBytes(const string16& input, std::vector<uint8>* output); 462 463// For floating-point conversions, only conversions of input strings in decimal 464// form are defined to work. Behavior with strings representing floating-point 465// numbers in hexadecimal, and strings representing non-fininte values (such as 466// NaN and inf) is undefined. Otherwise, these behave the same as the integral 467// variants. This expects the input string to NOT be specific to the locale. 468// If your input is locale specific, use ICU to read the number. 469bool StringToDouble(const std::string& input, double* output); 470bool StringToDouble(const string16& input, double* output); 471 472// Convenience forms of the above, when the caller is uninterested in the 473// boolean return value. These return only the |*output| value from the 474// above conversions: a best-effort conversion when possible, otherwise, 0. 475int StringToInt(const std::string& value); 476int StringToInt(const string16& value); 477int64 StringToInt64(const std::string& value); 478int64 StringToInt64(const string16& value); 479int HexStringToInt(const std::string& value); 480int HexStringToInt(const string16& value); 481double StringToDouble(const std::string& value); 482double StringToDouble(const string16& value); 483 484// Return a C++ string given printf-like input. 485std::string StringPrintf(const char* format, ...) PRINTF_FORMAT(1, 2); 486std::wstring StringPrintf(const wchar_t* format, ...) WPRINTF_FORMAT(1, 2); 487 488// Return a C++ string given vprintf-like input. 489std::string StringPrintV(const char* format, va_list ap) PRINTF_FORMAT(1, 0); 490 491// Store result into a supplied string and return it 492const std::string& SStringPrintf(std::string* dst, const char* format, ...) 493 PRINTF_FORMAT(2, 3); 494const std::wstring& SStringPrintf(std::wstring* dst, 495 const wchar_t* format, ...) 496 WPRINTF_FORMAT(2, 3); 497 498// Append result to a supplied string 499void StringAppendF(std::string* dst, const char* format, ...) 500 PRINTF_FORMAT(2, 3); 501void StringAppendF(std::wstring* dst, const wchar_t* format, ...) 502 WPRINTF_FORMAT(2, 3); 503 504// Lower-level routine that takes a va_list and appends to a specified 505// string. All other routines are just convenience wrappers around it. 506void StringAppendV(std::string* dst, const char* format, va_list ap) 507 PRINTF_FORMAT(2, 0); 508void StringAppendV(std::wstring* dst, const wchar_t* format, va_list ap) 509 WPRINTF_FORMAT(2, 0); 510 511// This is mpcomplete's pattern for saving a string copy when dealing with 512// a function that writes results into a wchar_t[] and wanting the result to 513// end up in a std::wstring. It ensures that the std::wstring's internal 514// buffer has enough room to store the characters to be written into it, and 515// sets its .length() attribute to the right value. 516// 517// The reserve() call allocates the memory required to hold the string 518// plus a terminating null. This is done because resize() isn't 519// guaranteed to reserve space for the null. The resize() call is 520// simply the only way to change the string's 'length' member. 521// 522// XXX-performance: the call to wide.resize() takes linear time, since it fills 523// the string's buffer with nulls. I call it to change the length of the 524// string (needed because writing directly to the buffer doesn't do this). 525// Perhaps there's a constant-time way to change the string's length. 526template <class string_type> 527inline typename string_type::value_type* WriteInto(string_type* str, 528 size_t length_with_null) { 529 str->reserve(length_with_null); 530 str->resize(length_with_null - 1); 531 return &((*str)[0]); 532} 533 534//----------------------------------------------------------------------------- 535 536// Function objects to aid in comparing/searching strings. 537 538template<typename Char> struct CaseInsensitiveCompare { 539 public: 540 bool operator()(Char x, Char y) const { 541 // TODO(darin): Do we really want to do locale sensitive comparisons here? 542 // See http://crbug.com/24917 543 return tolower(x) == tolower(y); 544 } 545}; 546 547template<typename Char> struct CaseInsensitiveCompareASCII { 548 public: 549 bool operator()(Char x, Char y) const { 550 return ToLowerASCII(x) == ToLowerASCII(y); 551 } 552}; 553 554// TODO(timsteele): Move these split string functions into their own API on 555// string_split.cc/.h files. 556//----------------------------------------------------------------------------- 557 558// Splits |str| into a vector of strings delimited by |s|. Append the results 559// into |r| as they appear. If several instances of |s| are contiguous, or if 560// |str| begins with or ends with |s|, then an empty string is inserted. 561// 562// Every substring is trimmed of any leading or trailing white space. 563void SplitString(const std::wstring& str, 564 wchar_t s, 565 std::vector<std::wstring>* r); 566void SplitString(const string16& str, 567 char16 s, 568 std::vector<string16>* r); 569void SplitString(const std::string& str, 570 char s, 571 std::vector<std::string>* r); 572 573// The same as SplitString, but don't trim white space. 574void SplitStringDontTrim(const std::wstring& str, 575 wchar_t s, 576 std::vector<std::wstring>* r); 577void SplitStringDontTrim(const string16& str, 578 char16 s, 579 std::vector<string16>* r); 580void SplitStringDontTrim(const std::string& str, 581 char s, 582 std::vector<std::string>* r); 583 584// The same as SplitString, but use a substring delimiter instead of a char. 585void SplitStringUsingSubstr(const string16& str, 586 const string16& s, 587 std::vector<string16>* r); 588void SplitStringUsingSubstr(const std::string& str, 589 const std::string& s, 590 std::vector<std::string>* r); 591 592// Splits a string into its fields delimited by any of the characters in 593// |delimiters|. Each field is added to the |tokens| vector. Returns the 594// number of tokens found. 595size_t Tokenize(const std::wstring& str, 596 const std::wstring& delimiters, 597 std::vector<std::wstring>* tokens); 598size_t Tokenize(const string16& str, 599 const string16& delimiters, 600 std::vector<string16>* tokens); 601size_t Tokenize(const std::string& str, 602 const std::string& delimiters, 603 std::vector<std::string>* tokens); 604size_t Tokenize(const base::StringPiece& str, 605 const base::StringPiece& delimiters, 606 std::vector<base::StringPiece>* tokens); 607 608// Does the opposite of SplitString(). 609std::wstring JoinString(const std::vector<std::wstring>& parts, wchar_t s); 610string16 JoinString(const std::vector<string16>& parts, char16 s); 611std::string JoinString(const std::vector<std::string>& parts, char s); 612 613// WARNING: this uses whitespace as defined by the HTML5 spec. If you need 614// a function similar to this but want to trim all types of whitespace, then 615// factor this out into a function that takes a string containing the characters 616// that are treated as whitespace. 617// 618// Splits the string along whitespace (where whitespace is the five space 619// characters defined by HTML 5). Each contiguous block of non-whitespace 620// characters is added to result. 621void SplitStringAlongWhitespace(const std::wstring& str, 622 std::vector<std::wstring>* result); 623void SplitStringAlongWhitespace(const string16& str, 624 std::vector<string16>* result); 625void SplitStringAlongWhitespace(const std::string& str, 626 std::vector<std::string>* result); 627 628// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. 629// Additionally, $$ is replaced by $. The offsets parameter here can 630// be NULL. This only allows you to use up to nine replacements. 631string16 ReplaceStringPlaceholders(const string16& format_string, 632 const std::vector<string16>& subst, 633 std::vector<size_t>* offsets); 634 635std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, 636 const std::vector<std::string>& subst, 637 std::vector<size_t>* offsets); 638 639// Single-string shortcut for ReplaceStringHolders. 640string16 ReplaceStringPlaceholders(const string16& format_string, 641 const string16& a, 642 size_t* offset); 643 644// If the size of |input| is more than |max_len|, this function returns true and 645// |input| is shortened into |output| by removing chars in the middle (they are 646// replaced with up to 3 dots, as size permits). 647// Ex: ElideString(L"Hello", 10, &str) puts Hello in str and returns false. 648// ElideString(L"Hello my name is Tom", 10, &str) puts "Hell...Tom" in str and 649// returns true. 650bool ElideString(const std::wstring& input, int max_len, std::wstring* output); 651 652// Returns true if the string passed in matches the pattern. The pattern 653// string can contain wildcards like * and ? 654// The backslash character (\) is an escape character for * and ? 655// We limit the patterns to having a max of 16 * or ? characters. 656bool MatchPatternWide(const std::wstring& string, const std::wstring& pattern); 657bool MatchPatternASCII(const std::string& string, const std::string& pattern); 658 659// Returns a hex string representation of a binary buffer. 660// The returned hex string will be in upper case. 661// This function does not check if |size| is within reasonable limits since 662// it's written with trusted data in mind. 663// If you suspect that the data you want to format might be large, 664// the absolute max size for |size| should be is 665// std::numeric_limits<size_t>::max() / 2 666std::string HexEncode(const void* bytes, size_t size); 667 668// Hack to convert any char-like type to its unsigned counterpart. 669// For example, it will convert char, signed char and unsigned char to unsigned 670// char. 671template<typename T> 672struct ToUnsigned { 673 typedef T Unsigned; 674}; 675 676template<> 677struct ToUnsigned<char> { 678 typedef unsigned char Unsigned; 679}; 680template<> 681struct ToUnsigned<signed char> { 682 typedef unsigned char Unsigned; 683}; 684template<> 685struct ToUnsigned<wchar_t> { 686#if defined(WCHAR_T_IS_UTF16) 687 typedef unsigned short Unsigned; 688#elif defined(WCHAR_T_IS_UTF32) 689 typedef uint32 Unsigned; 690#endif 691}; 692template<> 693struct ToUnsigned<short> { 694 typedef unsigned short Unsigned; 695}; 696 697#endif // BASE_STRING_UTIL_H_ 698