string_util.h revision b910a63ff3111067e79c016f40a7c1baac943405
1b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Copyright 2013 The Chromium Authors. All rights reserved. 2b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Use of this source code is governed by a BSD-style license that can be 3b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// found in the LICENSE file. 4b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 5b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// This file defines utility functions for working with strings. 6b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 7b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#ifndef BASE_STRINGS_STRING_UTIL_H_ 8b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#define BASE_STRINGS_STRING_UTIL_H_ 9b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 10b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include <ctype.h> 11b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include <stdarg.h> // va_list 12b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 13b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include <string> 14b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include <vector> 15b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 16b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/base_export.h" 17b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/basictypes.h" 18b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/compiler_specific.h" 19b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/strings/string16.h" 20b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/strings/string_piece.h" // For implicit conversions. 21b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 22b910a63ff3111067e79c016f40a7c1baac943405Daniel Erat// On Android, bionic's stdio.h defines an snprintf macro when being built with 23b910a63ff3111067e79c016f40a7c1baac943405Daniel Erat// clang. Undefine it here so it won't collide with base::snprintf(). 24b910a63ff3111067e79c016f40a7c1baac943405Daniel Erat#undef snprintf 25b910a63ff3111067e79c016f40a7c1baac943405Daniel Erat 26b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratnamespace base { 27b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 28b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// C standard-library functions like "strncasecmp" and "snprintf" that aren't 29b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// cross-platform are provided as "base::strncasecmp", and their prototypes 30b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// are listed below. These functions are then implemented as inline calls 31b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// to the platform-specific equivalents in the platform-specific headers. 32b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 33b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Compares the two strings s1 and s2 without regard to case using 34b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if 35b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// s2 > s1 according to a lexicographic comparison. 36b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratint strcasecmp(const char* s1, const char* s2); 37b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 38b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Compares up to count characters of s1 and s2 without regard to case using 39b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if 40b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// s2 > s1 according to a lexicographic comparison. 41b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratint strncasecmp(const char* s1, const char* s2, size_t count); 42b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 43b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Same as strncmp but for char16 strings. 44b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratint strncmp16(const char16* s1, const char16* s2, size_t count); 45b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 46b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Wrapper for vsnprintf that always null-terminates and always returns the 47b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// number of characters that would be in an untruncated formatted 48b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// string, even when truncation occurs. 49b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratint vsnprintf(char* buffer, size_t size, const char* format, va_list arguments) 50b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat PRINTF_FORMAT(3, 0); 51b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 52b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Some of these implementations need to be inlined. 53b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 54b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// We separate the declaration from the implementation of this inline 55b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// function just so the PRINTF_FORMAT works. 56b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratinline int snprintf(char* buffer, size_t size, const char* format, ...) 57b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat PRINTF_FORMAT(3, 4); 58b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratinline int snprintf(char* buffer, size_t size, const char* format, ...) { 59b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat va_list arguments; 60b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat va_start(arguments, format); 61b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat int result = vsnprintf(buffer, size, format, arguments); 62b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat va_end(arguments); 63b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return result; 64b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 65b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 66b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// BSD-style safe and consistent string copy functions. 67b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|. 68b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as 69b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// long as |dst_size| is not 0. Returns the length of |src| in characters. 70b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// If the return value is >= dst_size, then the output was truncated. 71b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// NOTE: All sizes are in number of characters, NOT in bytes. 72b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT size_t strlcpy(char* dst, const char* src, size_t dst_size); 73b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size); 74b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 75b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Scan a wprintf format string to determine whether it's portable across a 76b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// variety of systems. This function only checks that the conversion 77b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// specifiers used by the format string are supported and have the same meaning 78b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// on a variety of systems. It doesn't check for other errors that might occur 79b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// within a format string. 80b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 81b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Nonportable conversion specifiers for wprintf are: 82b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// - 's' and 'c' without an 'l' length modifier. %s and %c operate on char 83b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// data on all systems except Windows, which treat them as wchar_t data. 84b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Use %ls and %lc for wchar_t data instead. 85b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// - 'S' and 'C', which operate on wchar_t data on all systems except Windows, 86b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// which treat them as char data. Use %ls and %lc for wchar_t data 87b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// instead. 88b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// - 'F', which is not identified by Windows wprintf documentation. 89b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// - 'D', 'O', and 'U', which are deprecated and not available on all systems. 90b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Use %ld, %lo, and %lu instead. 91b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 92b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Note that there is no portable conversion specifier for char data when 93b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// working with wprintf. 94b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 95b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// This function is intended to be called from base::vswprintf. 96b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool IsWprintfFormatPortable(const wchar_t* format); 97b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 98b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// ASCII-specific tolower. The standard library's tolower is locale sensitive, 99b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// so we don't want to use it here. 100b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <class Char> inline Char ToLowerASCII(Char c) { 101b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; 102b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 103b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 104b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// ASCII-specific toupper. The standard library's toupper is locale sensitive, 105b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// so we don't want to use it here. 106b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <class Char> inline Char ToUpperASCII(Char c) { 107b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c; 108b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 109b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 110b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Function objects to aid in comparing/searching strings. 111b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 112b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate<typename Char> struct CaseInsensitiveCompare { 113b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat public: 114b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat bool operator()(Char x, Char y) const { 115b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // TODO(darin): Do we really want to do locale sensitive comparisons here? 116b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // See http://crbug.com/24917 117b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return tolower(x) == tolower(y); 118b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } 119b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat}; 120b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 121b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate<typename Char> struct CaseInsensitiveCompareASCII { 122b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat public: 123b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat bool operator()(Char x, Char y) const { 124b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return ToLowerASCII(x) == ToLowerASCII(y); 125b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } 126b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat}; 127b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 128b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// These threadsafe functions return references to globally unique empty 129b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// strings. 130b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 131b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// It is likely faster to construct a new empty string object (just a few 132b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// instructions to set the length to 0) than to get the empty string singleton 133b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// returned by these functions (which requires threadsafe singleton access). 134b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 135b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Therefore, DO NOT USE THESE AS A GENERAL-PURPOSE SUBSTITUTE FOR DEFAULT 136b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// CONSTRUCTORS. There is only one case where you should use these: functions 137b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// which need to return a string by reference (e.g. as a class member 138b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// accessor), and don't have an empty string to use (e.g. in an error case). 139b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// These should not be used as initializers, function arguments, or return 140b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// values for functions which return by value or outparam. 141b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT const std::string& EmptyString(); 142b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT const string16& EmptyString16(); 143b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 144b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Contains the set of characters representing whitespace in the corresponding 145b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// encoding. Null-terminated. The ASCII versions are the whitespaces as defined 146b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// by HTML5, and don't include control characters. 147b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT extern const wchar_t kWhitespaceWide[]; // Includes Unicode. 148b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT extern const char16 kWhitespaceUTF16[]; // Includes Unicode. 149b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT extern const char kWhitespaceASCII[]; 150b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT extern const char16 kWhitespaceASCIIAs16[]; // No unicode. 151b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 152b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Null-terminated string representing the UTF-8 byte order mark. 153b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT extern const char kUtf8ByteOrderMark[]; 154b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 155b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Removes characters in |remove_chars| from anywhere in |input|. Returns true 156b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// if any characters were removed. |remove_chars| must be null-terminated. 157b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// NOTE: Safe to use the same variable for both |input| and |output|. 158b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool RemoveChars(const string16& input, 159b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::StringPiece16& remove_chars, 160b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat string16* output); 161b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool RemoveChars(const std::string& input, 162b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::StringPiece& remove_chars, 163b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string* output); 164b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 165b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Replaces characters in |replace_chars| from anywhere in |input| with 166b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// |replace_with|. Each character in |replace_chars| will be replaced with 167b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// the |replace_with| string. Returns true if any characters were replaced. 168b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// |replace_chars| must be null-terminated. 169b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// NOTE: Safe to use the same variable for both |input| and |output|. 170b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool ReplaceChars(const string16& input, 171b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::StringPiece16& replace_chars, 172b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const string16& replace_with, 173b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat string16* output); 174b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool ReplaceChars(const std::string& input, 175b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::StringPiece& replace_chars, 176b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::string& replace_with, 177b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string* output); 178b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 179b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratenum TrimPositions { 180b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat TRIM_NONE = 0, 181b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat TRIM_LEADING = 1 << 0, 182b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat TRIM_TRAILING = 1 << 1, 183b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat TRIM_ALL = TRIM_LEADING | TRIM_TRAILING, 184b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat}; 185b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 186b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Removes characters in |trim_chars| from the beginning and end of |input|. 187b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// The 8-bit version only works on 8-bit characters, not UTF-8. 188b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 189b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// It is safe to use the same variable for both |input| and |output| (this is 190b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// the normal usage to trim in-place). 191b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool TrimString(const string16& input, 192b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat base::StringPiece16 trim_chars, 193b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat string16* output); 194b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool TrimString(const std::string& input, 195b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat base::StringPiece trim_chars, 196b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string* output); 197b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 198b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// StringPiece versions of the above. The returned pieces refer to the original 199b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// buffer. 200b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT StringPiece16 TrimString(StringPiece16 input, 201b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::StringPiece16& trim_chars, 202b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat TrimPositions positions); 203b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT StringPiece TrimString(StringPiece input, 204b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::StringPiece& trim_chars, 205b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat TrimPositions positions); 206b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 207b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Truncates a string to the nearest UTF-8 character that will leave 208b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// the string less than or equal to the specified byte size. 209b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT void TruncateUTF8ToByteSize(const std::string& input, 210b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const size_t byte_size, 211b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string* output); 212b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 213b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Trims any whitespace from either end of the input string. Returns where 214b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// whitespace was found. 215b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// The non-wide version has two functions: 216b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// * TrimWhitespaceASCII() 217b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// This function is for ASCII strings and only looks for ASCII whitespace; 218b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Please choose the best one according to your usage. 219b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// NOTE: Safe to use the same variable for both input and output. 220b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT TrimPositions TrimWhitespace(const string16& input, 221b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat TrimPositions positions, 222b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat base::string16* output); 223b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT TrimPositions TrimWhitespaceASCII(const std::string& input, 224b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat TrimPositions positions, 225b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string* output); 226b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 227b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Deprecated. This function is only for backward compatibility and calls 228b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// TrimWhitespaceASCII(). 229b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT TrimPositions TrimWhitespace(const std::string& input, 230b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat TrimPositions positions, 231b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string* output); 232b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 233b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Searches for CR or LF characters. Removes all contiguous whitespace 234b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// strings that contain them. This is useful when trying to deal with text 235b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// copied from terminals. 236b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Returns |text|, with the following three transformations: 237b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// (1) Leading and trailing whitespace is trimmed. 238b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// (2) If |trim_sequences_with_line_breaks| is true, any other whitespace 239b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// sequences containing a CR or LF are trimmed. 240b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// (3) All other whitespace sequences are converted to single spaces. 241b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT string16 CollapseWhitespace( 242b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const string16& text, 243b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat bool trim_sequences_with_line_breaks); 244b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT std::string CollapseWhitespaceASCII( 245b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::string& text, 246b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat bool trim_sequences_with_line_breaks); 247b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 248b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Returns true if |input| is empty or contains only characters found in 249b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// |characters|. 250b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool ContainsOnlyChars(const StringPiece& input, 251b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const StringPiece& characters); 252b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool ContainsOnlyChars(const StringPiece16& input, 253b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const StringPiece16& characters); 254b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 255b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Returns true if the specified string matches the criteria. How can a wide 256b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// string be 8-bit or UTF8? It contains only characters that are < 256 (in the 257b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// first case) or characters that use only 8-bits and whose 8-bit 258b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// representation looks like a UTF-8 string (the second case). 259b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 260b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Note that IsStringUTF8 checks not only if the input is structurally 261b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// valid but also if it doesn't contain any non-character codepoint 262b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// (e.g. U+FFFE). It's done on purpose because all the existing callers want 263b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// to have the maximum 'discriminating' power from other encodings. If 264b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// there's a use case for just checking the structural validity, we have to 265b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// add a new function for that. 266b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 267b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// IsStringASCII assumes the input is likely all ASCII, and does not leave early 268b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// if it is not the case. 269b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool IsStringUTF8(const StringPiece& str); 270b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool IsStringASCII(const StringPiece& str); 271b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool IsStringASCII(const StringPiece16& str); 272b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// A convenience adaptor for WebStrings, as they don't convert into 273b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// StringPieces directly. 274b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool IsStringASCII(const string16& str); 275b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#if defined(WCHAR_T_IS_UTF32) 276b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool IsStringASCII(const std::wstring& str); 277b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#endif 278b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 279b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Converts the elements of the given string. This version uses a pointer to 280b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// clearly differentiate it from the non-pointer variant. 281b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <class str> inline void StringToLowerASCII(str* s) { 282b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat for (typename str::iterator i = s->begin(); i != s->end(); ++i) 283b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat *i = ToLowerASCII(*i); 284b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 285b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 286b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <class str> inline str StringToLowerASCII(const str& s) { 287b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // for std::string and std::wstring 288b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat str output(s); 289b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat StringToLowerASCII(&output); 290b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return output; 291b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 292b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 293b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Converts the elements of the given string. This version uses a pointer to 294b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// clearly differentiate it from the non-pointer variant. 295b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <class str> inline void StringToUpperASCII(str* s) { 296b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat for (typename str::iterator i = s->begin(); i != s->end(); ++i) 297b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat *i = ToUpperASCII(*i); 298b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 299b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 300b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <class str> inline str StringToUpperASCII(const str& s) { 301b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // for std::string and std::wstring 302b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat str output(s); 303b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat StringToUpperASCII(&output); 304b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return output; 305b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 306b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 307b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Compare the lower-case form of the given string against the given ASCII 308b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// string. This is useful for doing checking if an input string matches some 309b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// token, and it is optimized to avoid intermediate string copies. This API is 310b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// borrowed from the equivalent APIs in Mozilla. 311b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool LowerCaseEqualsASCII(const std::string& a, const char* b); 312b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool LowerCaseEqualsASCII(const string16& a, const char* b); 313b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 314b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Same thing, but with string iterators instead. 315b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 316b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string::const_iterator a_end, 317b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* b); 318b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 319b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat string16::const_iterator a_end, 320b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* b); 321b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, 322b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* a_end, 323b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* b); 324b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool LowerCaseEqualsASCII(const char* a_begin, 325b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* a_end, 326b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* b_begin, 327b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* b_end); 328b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool LowerCaseEqualsASCII(const char16* a_begin, 329b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char16* a_end, 330b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* b); 331b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 332b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Performs a case-sensitive string compare. The behavior is undefined if both 333b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// strings are not ASCII. 334b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool EqualsASCII(const string16& a, const StringPiece& b); 335b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 336b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Returns true if str starts with search, or false otherwise. 337b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// TODO(brettw) the case sensitive flag makes callsites difficult to read. 338b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Consider splitting this out in two variants (few callers want 339b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// case-insensitive compares) or use an enum that makes this more explicit. 340b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool StartsWithASCII(const std::string& str, 341b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::string& search, 342b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat bool case_sensitive); 343b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool StartsWith(const base::string16& str, 344b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& search, 345b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat bool case_sensitive); 346b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 347b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Returns true if str ends with search, or false otherwise. 348b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// TODO(brettw) case sensitive flag confusion, see StartsWith above. 349b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool EndsWith(const std::string& str, 350b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::string& search, 351b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat bool case_sensitive); 352b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool EndsWith(const base::string16& str, 353b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& search, 354b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat bool case_sensitive); 355b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 356b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} // namespace base 357b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 358b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#if defined(OS_WIN) 359b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/strings/string_util_win.h" 360b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#elif defined(OS_POSIX) 361b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/strings/string_util_posix.h" 362b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#else 363b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#error Define string operations appropriately for your platform 364b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#endif 365b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 366b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Determines the type of ASCII character, independent of locale (the C 367b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// library versions will change based on locale). 368b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <typename Char> 369b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratinline bool IsAsciiWhitespace(Char c) { 370b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return c == ' ' || c == '\r' || c == '\n' || c == '\t'; 371b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 372b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <typename Char> 373b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratinline bool IsAsciiAlpha(Char c) { 374b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')); 375b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 376b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <typename Char> 377b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratinline bool IsAsciiDigit(Char c) { 378b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return c >= '0' && c <= '9'; 379b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 380b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 381b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <typename Char> 382b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratinline bool IsHexDigit(Char c) { 383b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return (c >= '0' && c <= '9') || 384b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat (c >= 'A' && c <= 'F') || 385b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat (c >= 'a' && c <= 'f'); 386b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 387b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 388b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <typename Char> 389b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratinline char HexDigitToInt(Char c) { 390b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat DCHECK(IsHexDigit(c)); 391b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat if (c >= '0' && c <= '9') 392b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return static_cast<char>(c - '0'); 393b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat if (c >= 'A' && c <= 'F') 394b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return static_cast<char>(c - 'A' + 10); 395b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat if (c >= 'a' && c <= 'f') 396b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return static_cast<char>(c - 'a' + 10); 397b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return 0; 398b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 399b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 400b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Returns true if it's a whitespace character. 401b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratinline bool IsWhitespace(wchar_t c) { 402b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return wcschr(base::kWhitespaceWide, c) != NULL; 403b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 404b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 405b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Return a byte string in human-readable format with a unit suffix. Not 406b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// appropriate for use in any UI; use of FormatBytes and friends in ui/base is 407b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// highly recommended instead. TODO(avi): Figure out how to get callers to use 408b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// FormatBytes instead; remove this. 409b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT base::string16 FormatBytesUnlocalized(int64 bytes); 410b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 411b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Starting at |start_offset| (usually 0), replace the first instance of 412b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// |find_this| with |replace_with|. 413b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT void ReplaceFirstSubstringAfterOffset( 414b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat base::string16* str, 415b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat size_t start_offset, 416b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& find_this, 417b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& replace_with); 418b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT void ReplaceFirstSubstringAfterOffset( 419b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string* str, 420b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat size_t start_offset, 421b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::string& find_this, 422b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::string& replace_with); 423b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 424b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Starting at |start_offset| (usually 0), look through |str| and replace all 425b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// instances of |find_this| with |replace_with|. 426b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 427b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// This does entire substrings; use std::replace in <algorithm> for single 428b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// characters, for example: 429b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// std::replace(str.begin(), str.end(), 'a', 'b'); 430b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT void ReplaceSubstringsAfterOffset( 431b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat base::string16* str, 432b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat size_t start_offset, 433b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& find_this, 434b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& replace_with); 435b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT void ReplaceSubstringsAfterOffset(std::string* str, 436b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat size_t start_offset, 437b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::string& find_this, 438b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::string& replace_with); 439b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 440b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Reserves enough memory in |str| to accommodate |length_with_null| characters, 441b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// sets the size of |str| to |length_with_null - 1| characters, and returns a 442b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// pointer to the underlying contiguous array of characters. This is typically 443b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// used when calling a function that writes results into a character array, but 444b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// the caller wants the data to be managed by a string-like object. It is 445b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// convenient in that is can be used inline in the call, and fast in that it 446b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// avoids copying the results of the call from a char* into a string. 447b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 448b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// |length_with_null| must be at least 2, since otherwise the underlying string 449b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// would have size 0, and trying to access &((*str)[0]) in that case can result 450b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// in a number of problems. 451b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 452b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Internally, this takes linear time because the resize() call 0-fills the 453b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// underlying array for potentially all 454b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// (|length_with_null - 1| * sizeof(string_type::value_type)) bytes. Ideally we 455b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// could avoid this aspect of the resize() call, as we expect the caller to 456b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// immediately write over this memory, but there is no other way to set the size 457b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// of the string, and not doing that will mean people who access |str| rather 458b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// than str.c_str() will get back a string of whatever size |str| had on entry 459b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// to this function (probably 0). 460b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erattemplate <class string_type> 461b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratinline typename string_type::value_type* WriteInto(string_type* str, 462b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat size_t length_with_null) { 463b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat DCHECK_GT(length_with_null, 1u); 464b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat str->reserve(length_with_null); 465b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat str->resize(length_with_null - 1); 466b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat return &((*str)[0]); 467b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 468b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 469b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat//----------------------------------------------------------------------------- 470b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 471b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Splits a string into its fields delimited by any of the characters in 472b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// |delimiters|. Each field is added to the |tokens| vector. Returns the 473b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// number of tokens found. 474b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// 475b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// DEPRECATED. Use SplitStringUsingSet for new code (these just forward). 476b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// TODO(brettw) convert callers and delete these forwarders. 477b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT size_t Tokenize(const base::string16& str, 478b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& delimiters, 479b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::vector<base::string16>* tokens); 480b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT size_t Tokenize(const std::string& str, 481b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::string& delimiters, 482b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::vector<std::string>* tokens); 483b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT size_t Tokenize(const base::StringPiece& str, 484b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::StringPiece& delimiters, 485b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::vector<base::StringPiece>* tokens); 486b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 487b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Does the opposite of SplitString(). 488b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT base::string16 JoinString(const std::vector<base::string16>& parts, 489b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat base::char16 s); 490b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT std::string JoinString( 491b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::vector<std::string>& parts, char s); 492b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 493b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Join |parts| using |separator|. 494b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT std::string JoinString( 495b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::vector<std::string>& parts, 496b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::string& separator); 497b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT base::string16 JoinString( 498b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::vector<base::string16>& parts, 499b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& separator); 500b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 501b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Replace $1-$2-$3..$9 in the format string with |a|-|b|-|c|..|i| respectively. 502b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Additionally, any number of consecutive '$' characters is replaced by that 503b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// number less one. Eg $$->$, $$$->$$, etc. The offsets parameter here can be 504b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// NULL. This only allows you to use up to nine replacements. 505b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT base::string16 ReplaceStringPlaceholders( 506b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& format_string, 507b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::vector<base::string16>& subst, 508b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::vector<size_t>* offsets); 509b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 510b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT std::string ReplaceStringPlaceholders( 511b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::StringPiece& format_string, 512b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const std::vector<std::string>& subst, 513b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::vector<size_t>* offsets); 514b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 515b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Single-string shortcut for ReplaceStringHolders. |offset| may be NULL. 516b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT base::string16 ReplaceStringPlaceholders( 517b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& format_string, 518b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& a, 519b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat size_t* offset); 520b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 521b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Returns true if the string passed in matches the pattern. The pattern 522b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// string can contain wildcards like * and ? 523b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// The backslash character (\) is an escape character for * and ? 524b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// We limit the patterns to having a max of 16 * or ? characters. 525b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// ? matches 0 or 1 character, while * matches 0 or more characters. 526b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool MatchPattern(const base::StringPiece& string, 527b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::StringPiece& pattern); 528b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratBASE_EXPORT bool MatchPattern(const base::string16& string, 529b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const base::string16& pattern); 530b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 531b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#endif // BASE_STRINGS_STRING_UTIL_H_ 532