1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/strings/string_util.h" 6 7#include <stdint.h> 8#include <limits> 9#include "base/macros.h" 10#include "base/strings/utf_string_conversion_utils.h" 11#include "base/third_party/icu/icu_utf.h" 12 13namespace base { 14 15namespace { 16 17typedef uintptr_t MachineWord; 18const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1; 19 20inline bool IsAlignedToMachineWord(const void* pointer) { 21 return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask); 22} 23 24template<typename T> inline T* AlignToMachineWord(T* pointer) { 25 return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) & 26 ~kMachineWordAlignmentMask); 27} 28 29template<size_t size, typename CharacterType> struct NonASCIIMask; 30template<> struct NonASCIIMask<4, char> { 31 static inline uint32_t value() { return 0x80808080U; } 32}; 33template<> struct NonASCIIMask<8, char> { 34 static inline uint64_t value() { return 0x8080808080808080ULL; } 35}; 36 37} // namespace 38namespace { 39 40template<typename StringType> 41StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) { 42 StringType ret; 43 ret.reserve(str.size()); 44 for (size_t i = 0; i < str.size(); i++) 45 ret.push_back(ToLowerASCII(str[i])); 46 return ret; 47} 48 49template<typename StringType> 50StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) { 51 StringType ret; 52 ret.reserve(str.size()); 53 for (size_t i = 0; i < str.size(); i++) 54 ret.push_back(ToUpperASCII(str[i])); 55 return ret; 56} 57 58} // namespace 59 60std::string ToLowerASCII(StringPiece str) { 61 return ToLowerASCIIImpl<std::string>(str); 62} 63 64std::string ToUpperASCII(StringPiece str) { 65 return ToUpperASCIIImpl<std::string>(str); 66} 67 68template<class StringType> 69int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a, 70 BasicStringPiece<StringType> b) { 71 // Find the first characters that aren't equal and compare them. If the end 72 // of one of the strings is found before a nonequal character, the lengths 73 // of the strings are compared. 74 size_t i = 0; 75 while (i < a.length() && i < b.length()) { 76 typename StringType::value_type lower_a = ToLowerASCII(a[i]); 77 typename StringType::value_type lower_b = ToLowerASCII(b[i]); 78 if (lower_a < lower_b) 79 return -1; 80 if (lower_a > lower_b) 81 return 1; 82 i++; 83 } 84 85 // End of one string hit before finding a different character. Expect the 86 // common case to be "strings equal" at this point so check that first. 87 if (a.length() == b.length()) 88 return 0; 89 90 if (a.length() < b.length()) 91 return -1; 92 return 1; 93} 94 95int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) { 96 return CompareCaseInsensitiveASCIIT<std::string>(a, b); 97} 98 99bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) { 100 if (a.length() != b.length()) 101 return false; 102 return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0; 103} 104 105template<typename STR> 106bool ReplaceCharsT(const STR& input, 107 const STR& replace_chars, 108 const STR& replace_with, 109 STR* output) { 110 bool removed = false; 111 size_t replace_length = replace_with.length(); 112 113 *output = input; 114 115 size_t found = output->find_first_of(replace_chars); 116 while (found != STR::npos) { 117 removed = true; 118 output->replace(found, 1, replace_with); 119 found = output->find_first_of(replace_chars, found + replace_length); 120 } 121 122 return removed; 123} 124 125bool ReplaceChars(const std::string& input, 126 const StringPiece& replace_chars, 127 const std::string& replace_with, 128 std::string* output) { 129 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); 130} 131 132template<typename Str> 133TrimPositions TrimStringT(const Str& input, 134 BasicStringPiece<Str> trim_chars, 135 TrimPositions positions, 136 Str* output) { 137 // Find the edges of leading/trailing whitespace as desired. Need to use 138 // a StringPiece version of input to be able to call find* on it with the 139 // StringPiece version of trim_chars (normally the trim_chars will be a 140 // constant so avoid making a copy). 141 BasicStringPiece<Str> input_piece(input); 142 const size_t last_char = input.length() - 1; 143 const size_t first_good_char = (positions & TRIM_LEADING) ? 144 input_piece.find_first_not_of(trim_chars) : 0; 145 const size_t last_good_char = (positions & TRIM_TRAILING) ? 146 input_piece.find_last_not_of(trim_chars) : last_char; 147 148 // When the string was all trimmed, report that we stripped off characters 149 // from whichever position the caller was interested in. For empty input, we 150 // stripped no characters, but we still need to clear |output|. 151 if (input.empty() || 152 (first_good_char == Str::npos) || (last_good_char == Str::npos)) { 153 bool input_was_empty = input.empty(); // in case output == &input 154 output->clear(); 155 return input_was_empty ? TRIM_NONE : positions; 156 } 157 158 // Trim. 159 *output = 160 input.substr(first_good_char, last_good_char - first_good_char + 1); 161 162 // Return where we trimmed from. 163 return static_cast<TrimPositions>( 164 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 165 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 166} 167 168bool TrimString(const std::string& input, 169 StringPiece trim_chars, 170 std::string* output) { 171 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 172} 173 174template<typename Str> 175BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input, 176 BasicStringPiece<Str> trim_chars, 177 TrimPositions positions) { 178 size_t begin = (positions & TRIM_LEADING) ? 179 input.find_first_not_of(trim_chars) : 0; 180 size_t end = (positions & TRIM_TRAILING) ? 181 input.find_last_not_of(trim_chars) + 1 : input.size(); 182 return input.substr(begin, end - begin); 183} 184 185StringPiece TrimString(StringPiece input, 186 const StringPiece& trim_chars, 187 TrimPositions positions) { 188 return TrimStringPieceT(input, trim_chars, positions); 189} 190 191TrimPositions TrimWhitespaceASCII(const std::string& input, 192 TrimPositions positions, 193 std::string* output) { 194 return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output); 195} 196 197template <class Char> 198inline bool DoIsStringASCII(const Char* characters, size_t length) { 199 MachineWord all_char_bits = 0; 200 const Char* end = characters + length; 201 202 // Prologue: align the input. 203 while (!IsAlignedToMachineWord(characters) && characters != end) { 204 all_char_bits |= *characters; 205 ++characters; 206 } 207 208 // Compare the values of CPU word size. 209 const Char* word_end = AlignToMachineWord(end); 210 const size_t loop_increment = sizeof(MachineWord) / sizeof(Char); 211 while (characters < word_end) { 212 all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters)); 213 characters += loop_increment; 214 } 215 216 // Process the remaining bytes. 217 while (characters != end) { 218 all_char_bits |= *characters; 219 ++characters; 220 } 221 222 MachineWord non_ascii_bit_mask = 223 NonASCIIMask<sizeof(MachineWord), Char>::value(); 224 return !(all_char_bits & non_ascii_bit_mask); 225} 226 227bool IsStringASCII(const StringPiece& str) { 228 return DoIsStringASCII(str.data(), str.length()); 229} 230 231bool IsStringUTF8(const StringPiece& str) { 232 const char *src = str.data(); 233 int32_t src_len = static_cast<int32_t>(str.length()); 234 int32_t char_index = 0; 235 236 while (char_index < src_len) { 237 int32_t code_point; 238 CBU8_NEXT(src, char_index, src_len, code_point); 239 if (!IsValidCharacter(code_point)) 240 return false; 241 } 242 return true; 243} 244 245} // namespace base 246