string_util.cc revision a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/strings/string_util.h" 6 7#include <ctype.h> 8#include <errno.h> 9#include <math.h> 10#include <stdarg.h> 11#include <stdio.h> 12#include <stdlib.h> 13#include <string.h> 14#include <time.h> 15#include <wchar.h> 16#include <wctype.h> 17 18#include <algorithm> 19#include <vector> 20 21#include "base/basictypes.h" 22#include "base/logging.h" 23#include "base/memory/singleton.h" 24#include "base/strings/utf_string_conversion_utils.h" 25#include "base/strings/utf_string_conversions.h" 26#include "base/third_party/icu/icu_utf.h" 27#include "build/build_config.h" 28 29// Remove when this entire file is in the base namespace. 30using base::char16; 31using base::string16; 32 33namespace { 34 35// Force the singleton used by Empty[W]String[16] to be a unique type. This 36// prevents other code that might accidentally use Singleton<string> from 37// getting our internal one. 38struct EmptyStrings { 39 EmptyStrings() {} 40 const std::string s; 41 const std::wstring ws; 42 const string16 s16; 43 44 static EmptyStrings* GetInstance() { 45 return Singleton<EmptyStrings>::get(); 46 } 47}; 48 49// Used by ReplaceStringPlaceholders to track the position in the string of 50// replaced parameters. 51struct ReplacementOffset { 52 ReplacementOffset(uintptr_t parameter, size_t offset) 53 : parameter(parameter), 54 offset(offset) {} 55 56 // Index of the parameter. 57 uintptr_t parameter; 58 59 // Starting position in the string. 60 size_t offset; 61}; 62 63static bool CompareParameter(const ReplacementOffset& elem1, 64 const ReplacementOffset& elem2) { 65 return elem1.parameter < elem2.parameter; 66} 67 68} // namespace 69 70namespace base { 71 72bool IsWprintfFormatPortable(const wchar_t* format) { 73 for (const wchar_t* position = format; *position != '\0'; ++position) { 74 if (*position == '%') { 75 bool in_specification = true; 76 bool modifier_l = false; 77 while (in_specification) { 78 // Eat up characters until reaching a known specifier. 79 if (*++position == '\0') { 80 // The format string ended in the middle of a specification. Call 81 // it portable because no unportable specifications were found. The 82 // string is equally broken on all platforms. 83 return true; 84 } 85 86 if (*position == 'l') { 87 // 'l' is the only thing that can save the 's' and 'c' specifiers. 88 modifier_l = true; 89 } else if (((*position == 's' || *position == 'c') && !modifier_l) || 90 *position == 'S' || *position == 'C' || *position == 'F' || 91 *position == 'D' || *position == 'O' || *position == 'U') { 92 // Not portable. 93 return false; 94 } 95 96 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 97 // Portable, keep scanning the rest of the format string. 98 in_specification = false; 99 } 100 } 101 } 102 } 103 104 return true; 105} 106 107const std::string& EmptyString() { 108 return EmptyStrings::GetInstance()->s; 109} 110 111const std::wstring& EmptyWString() { 112 return EmptyStrings::GetInstance()->ws; 113} 114 115const string16& EmptyString16() { 116 return EmptyStrings::GetInstance()->s16; 117} 118 119template<typename STR> 120bool ReplaceCharsT(const STR& input, 121 const typename STR::value_type replace_chars[], 122 const STR& replace_with, 123 STR* output) { 124 bool removed = false; 125 size_t replace_length = replace_with.length(); 126 127 *output = input; 128 129 size_t found = output->find_first_of(replace_chars); 130 while (found != STR::npos) { 131 removed = true; 132 output->replace(found, 1, replace_with); 133 found = output->find_first_of(replace_chars, found + replace_length); 134 } 135 136 return removed; 137} 138 139bool ReplaceChars(const string16& input, 140 const char16 replace_chars[], 141 const string16& replace_with, 142 string16* output) { 143 return ReplaceCharsT(input, replace_chars, replace_with, output); 144} 145 146bool ReplaceChars(const std::string& input, 147 const char replace_chars[], 148 const std::string& replace_with, 149 std::string* output) { 150 return ReplaceCharsT(input, replace_chars, replace_with, output); 151} 152 153bool RemoveChars(const string16& input, 154 const char16 remove_chars[], 155 string16* output) { 156 return ReplaceChars(input, remove_chars, string16(), output); 157} 158 159bool RemoveChars(const std::string& input, 160 const char remove_chars[], 161 std::string* output) { 162 return ReplaceChars(input, remove_chars, std::string(), output); 163} 164 165template<typename STR> 166TrimPositions TrimStringT(const STR& input, 167 const typename STR::value_type trim_chars[], 168 TrimPositions positions, 169 STR* output) { 170 // Find the edges of leading/trailing whitespace as desired. 171 const typename STR::size_type last_char = input.length() - 1; 172 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ? 173 input.find_first_not_of(trim_chars) : 0; 174 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ? 175 input.find_last_not_of(trim_chars) : last_char; 176 177 // When the string was all whitespace, report that we stripped off whitespace 178 // from whichever position the caller was interested in. For empty input, we 179 // stripped no whitespace, but we still need to clear |output|. 180 if (input.empty() || 181 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { 182 bool input_was_empty = input.empty(); // in case output == &input 183 output->clear(); 184 return input_was_empty ? TRIM_NONE : positions; 185 } 186 187 // Trim the whitespace. 188 *output = 189 input.substr(first_good_char, last_good_char - first_good_char + 1); 190 191 // Return where we trimmed from. 192 return static_cast<TrimPositions>( 193 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 194 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 195} 196 197bool TrimString(const string16& input, 198 const char16 trim_chars[], 199 string16* output) { 200 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 201} 202 203bool TrimString(const std::string& input, 204 const char trim_chars[], 205 std::string* output) { 206 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 207} 208 209void TruncateUTF8ToByteSize(const std::string& input, 210 const size_t byte_size, 211 std::string* output) { 212 DCHECK(output); 213 if (byte_size > input.length()) { 214 *output = input; 215 return; 216 } 217 DCHECK_LE(byte_size, static_cast<uint32>(kint32max)); 218 // Note: This cast is necessary because CBU8_NEXT uses int32s. 219 int32 truncation_length = static_cast<int32>(byte_size); 220 int32 char_index = truncation_length - 1; 221 const char* data = input.data(); 222 223 // Using CBU8, we will move backwards from the truncation point 224 // to the beginning of the string looking for a valid UTF8 225 // character. Once a full UTF8 character is found, we will 226 // truncate the string to the end of that character. 227 while (char_index >= 0) { 228 int32 prev = char_index; 229 uint32 code_point = 0; 230 CBU8_NEXT(data, char_index, truncation_length, code_point); 231 if (!IsValidCharacter(code_point) || 232 !IsValidCodepoint(code_point)) { 233 char_index = prev - 1; 234 } else { 235 break; 236 } 237 } 238 239 if (char_index >= 0 ) 240 *output = input.substr(0, char_index); 241 else 242 output->clear(); 243} 244 245} // namespace base 246 247TrimPositions TrimWhitespace(const base::string16& input, 248 TrimPositions positions, 249 base::string16* output) { 250 return base::TrimStringT(input, base::kWhitespaceUTF16, positions, output); 251} 252 253TrimPositions TrimWhitespaceASCII(const std::string& input, 254 TrimPositions positions, 255 std::string* output) { 256 return base::TrimStringT(input, base::kWhitespaceASCII, positions, output); 257} 258 259// This function is only for backward-compatibility. 260// To be removed when all callers are updated. 261TrimPositions TrimWhitespace(const std::string& input, 262 TrimPositions positions, 263 std::string* output) { 264 return TrimWhitespaceASCII(input, positions, output); 265} 266 267template<typename STR> 268STR CollapseWhitespaceT(const STR& text, 269 bool trim_sequences_with_line_breaks) { 270 STR result; 271 result.resize(text.size()); 272 273 // Set flags to pretend we're already in a trimmed whitespace sequence, so we 274 // will trim any leading whitespace. 275 bool in_whitespace = true; 276 bool already_trimmed = true; 277 278 int chars_written = 0; 279 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 280 if (IsWhitespace(*i)) { 281 if (!in_whitespace) { 282 // Reduce all whitespace sequences to a single space. 283 in_whitespace = true; 284 result[chars_written++] = L' '; 285 } 286 if (trim_sequences_with_line_breaks && !already_trimmed && 287 ((*i == '\n') || (*i == '\r'))) { 288 // Whitespace sequences containing CR or LF are eliminated entirely. 289 already_trimmed = true; 290 --chars_written; 291 } 292 } else { 293 // Non-whitespace chracters are copied straight across. 294 in_whitespace = false; 295 already_trimmed = false; 296 result[chars_written++] = *i; 297 } 298 } 299 300 if (in_whitespace && !already_trimmed) { 301 // Any trailing whitespace is eliminated. 302 --chars_written; 303 } 304 305 result.resize(chars_written); 306 return result; 307} 308 309string16 CollapseWhitespace(const string16& text, 310 bool trim_sequences_with_line_breaks) { 311 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 312} 313 314std::string CollapseWhitespaceASCII(const std::string& text, 315 bool trim_sequences_with_line_breaks) { 316 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 317} 318 319bool ContainsOnlyWhitespaceASCII(const std::string& str) { 320 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) { 321 if (!IsAsciiWhitespace(*i)) 322 return false; 323 } 324 return true; 325} 326 327bool ContainsOnlyWhitespace(const base::string16& str) { 328 return str.find_first_not_of(base::kWhitespaceUTF16) == string16::npos; 329} 330 331template<typename STR> 332static bool ContainsOnlyCharsT(const STR& input, const STR& characters) { 333 for (typename STR::const_iterator iter = input.begin(); 334 iter != input.end(); ++iter) { 335 if (characters.find(*iter) == STR::npos) 336 return false; 337 } 338 return true; 339} 340 341bool ContainsOnlyChars(const string16& input, const string16& characters) { 342 return ContainsOnlyCharsT(input, characters); 343} 344 345bool ContainsOnlyChars(const std::string& input, 346 const std::string& characters) { 347 return ContainsOnlyCharsT(input, characters); 348} 349 350#if !defined(WCHAR_T_IS_UTF16) 351bool IsStringASCII(const std::wstring& str); 352#endif 353 354std::string WideToASCII(const std::wstring& wide) { 355 DCHECK(IsStringASCII(wide)) << wide; 356 return std::string(wide.begin(), wide.end()); 357} 358 359std::string UTF16ToASCII(const string16& utf16) { 360 DCHECK(IsStringASCII(utf16)) << utf16; 361 return std::string(utf16.begin(), utf16.end()); 362} 363 364template<class STR> 365static bool DoIsStringASCII(const STR& str) { 366 for (size_t i = 0; i < str.length(); i++) { 367 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; 368 if (c > 0x7F) 369 return false; 370 } 371 return true; 372} 373 374#if !defined(WCHAR_T_IS_UTF16) 375bool IsStringASCII(const std::wstring& str) { 376 return DoIsStringASCII(str); 377} 378#endif 379 380bool IsStringASCII(const string16& str) { 381 return DoIsStringASCII(str); 382} 383 384bool IsStringASCII(const base::StringPiece& str) { 385 return DoIsStringASCII(str); 386} 387 388bool IsStringUTF8(const std::string& str) { 389 const char *src = str.data(); 390 int32 src_len = static_cast<int32>(str.length()); 391 int32 char_index = 0; 392 393 while (char_index < src_len) { 394 int32 code_point; 395 CBU8_NEXT(src, char_index, src_len, code_point); 396 if (!base::IsValidCharacter(code_point)) 397 return false; 398 } 399 return true; 400} 401 402template<typename Iter> 403static inline bool DoLowerCaseEqualsASCII(Iter a_begin, 404 Iter a_end, 405 const char* b) { 406 for (Iter it = a_begin; it != a_end; ++it, ++b) { 407 if (!*b || base::ToLowerASCII(*it) != *b) 408 return false; 409 } 410 return *b == 0; 411} 412 413// Front-ends for LowerCaseEqualsASCII. 414bool LowerCaseEqualsASCII(const std::string& a, const char* b) { 415 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 416} 417 418bool LowerCaseEqualsASCII(const string16& a, const char* b) { 419 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 420} 421 422bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 423 std::string::const_iterator a_end, 424 const char* b) { 425 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 426} 427 428bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 429 string16::const_iterator a_end, 430 const char* b) { 431 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 432} 433 434// TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here. 435#if !defined(OS_ANDROID) 436bool LowerCaseEqualsASCII(const char* a_begin, 437 const char* a_end, 438 const char* b) { 439 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 440} 441 442bool LowerCaseEqualsASCII(const char16* a_begin, 443 const char16* a_end, 444 const char* b) { 445 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 446} 447 448#endif // !defined(OS_ANDROID) 449 450bool EqualsASCII(const string16& a, const base::StringPiece& b) { 451 if (a.length() != b.length()) 452 return false; 453 return std::equal(b.begin(), b.end(), a.begin()); 454} 455 456bool StartsWithASCII(const std::string& str, 457 const std::string& search, 458 bool case_sensitive) { 459 if (case_sensitive) 460 return str.compare(0, search.length(), search) == 0; 461 else 462 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; 463} 464 465template <typename STR> 466bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { 467 if (case_sensitive) { 468 return str.compare(0, search.length(), search) == 0; 469 } else { 470 if (search.size() > str.size()) 471 return false; 472 return std::equal(search.begin(), search.end(), str.begin(), 473 base::CaseInsensitiveCompare<typename STR::value_type>()); 474 } 475} 476 477bool StartsWith(const string16& str, const string16& search, 478 bool case_sensitive) { 479 return StartsWithT(str, search, case_sensitive); 480} 481 482template <typename STR> 483bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { 484 typename STR::size_type str_length = str.length(); 485 typename STR::size_type search_length = search.length(); 486 if (search_length > str_length) 487 return false; 488 if (case_sensitive) { 489 return str.compare(str_length - search_length, search_length, search) == 0; 490 } else { 491 return std::equal(search.begin(), search.end(), 492 str.begin() + (str_length - search_length), 493 base::CaseInsensitiveCompare<typename STR::value_type>()); 494 } 495} 496 497bool EndsWith(const std::string& str, const std::string& search, 498 bool case_sensitive) { 499 return EndsWithT(str, search, case_sensitive); 500} 501 502bool EndsWith(const string16& str, const string16& search, 503 bool case_sensitive) { 504 return EndsWithT(str, search, case_sensitive); 505} 506 507static const char* const kByteStringsUnlocalized[] = { 508 " B", 509 " kB", 510 " MB", 511 " GB", 512 " TB", 513 " PB" 514}; 515 516string16 FormatBytesUnlocalized(int64 bytes) { 517 double unit_amount = static_cast<double>(bytes); 518 size_t dimension = 0; 519 const int kKilo = 1024; 520 while (unit_amount >= kKilo && 521 dimension < arraysize(kByteStringsUnlocalized) - 1) { 522 unit_amount /= kKilo; 523 dimension++; 524 } 525 526 char buf[64]; 527 if (bytes != 0 && dimension > 0 && unit_amount < 100) { 528 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount, 529 kByteStringsUnlocalized[dimension]); 530 } else { 531 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, 532 kByteStringsUnlocalized[dimension]); 533 } 534 535 return ASCIIToUTF16(buf); 536} 537 538template<class StringType> 539void DoReplaceSubstringsAfterOffset(StringType* str, 540 typename StringType::size_type start_offset, 541 const StringType& find_this, 542 const StringType& replace_with, 543 bool replace_all) { 544 if ((start_offset == StringType::npos) || (start_offset >= str->length())) 545 return; 546 547 DCHECK(!find_this.empty()); 548 for (typename StringType::size_type offs(str->find(find_this, start_offset)); 549 offs != StringType::npos; offs = str->find(find_this, offs)) { 550 str->replace(offs, find_this.length(), replace_with); 551 offs += replace_with.length(); 552 553 if (!replace_all) 554 break; 555 } 556} 557 558void ReplaceFirstSubstringAfterOffset(string16* str, 559 string16::size_type start_offset, 560 const string16& find_this, 561 const string16& replace_with) { 562 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 563 false); // replace first instance 564} 565 566void ReplaceFirstSubstringAfterOffset(std::string* str, 567 std::string::size_type start_offset, 568 const std::string& find_this, 569 const std::string& replace_with) { 570 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 571 false); // replace first instance 572} 573 574void ReplaceSubstringsAfterOffset(string16* str, 575 string16::size_type start_offset, 576 const string16& find_this, 577 const string16& replace_with) { 578 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 579 true); // replace all instances 580} 581 582void ReplaceSubstringsAfterOffset(std::string* str, 583 std::string::size_type start_offset, 584 const std::string& find_this, 585 const std::string& replace_with) { 586 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 587 true); // replace all instances 588} 589 590 591template<typename STR> 592static size_t TokenizeT(const STR& str, 593 const STR& delimiters, 594 std::vector<STR>* tokens) { 595 tokens->clear(); 596 597 typename STR::size_type start = str.find_first_not_of(delimiters); 598 while (start != STR::npos) { 599 typename STR::size_type end = str.find_first_of(delimiters, start + 1); 600 if (end == STR::npos) { 601 tokens->push_back(str.substr(start)); 602 break; 603 } else { 604 tokens->push_back(str.substr(start, end - start)); 605 start = str.find_first_not_of(delimiters, end + 1); 606 } 607 } 608 609 return tokens->size(); 610} 611 612size_t Tokenize(const string16& str, 613 const string16& delimiters, 614 std::vector<string16>* tokens) { 615 return TokenizeT(str, delimiters, tokens); 616} 617 618size_t Tokenize(const std::string& str, 619 const std::string& delimiters, 620 std::vector<std::string>* tokens) { 621 return TokenizeT(str, delimiters, tokens); 622} 623 624size_t Tokenize(const base::StringPiece& str, 625 const base::StringPiece& delimiters, 626 std::vector<base::StringPiece>* tokens) { 627 return TokenizeT(str, delimiters, tokens); 628} 629 630template<typename STR> 631static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) { 632 if (parts.empty()) 633 return STR(); 634 635 STR result(parts[0]); 636 typename std::vector<STR>::const_iterator iter = parts.begin(); 637 ++iter; 638 639 for (; iter != parts.end(); ++iter) { 640 result += sep; 641 result += *iter; 642 } 643 644 return result; 645} 646 647std::string JoinString(const std::vector<std::string>& parts, char sep) { 648 return JoinStringT(parts, std::string(1, sep)); 649} 650 651string16 JoinString(const std::vector<string16>& parts, char16 sep) { 652 return JoinStringT(parts, string16(1, sep)); 653} 654 655std::string JoinString(const std::vector<std::string>& parts, 656 const std::string& separator) { 657 return JoinStringT(parts, separator); 658} 659 660string16 JoinString(const std::vector<string16>& parts, 661 const string16& separator) { 662 return JoinStringT(parts, separator); 663} 664 665template<class FormatStringType, class OutStringType> 666OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, 667 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { 668 size_t substitutions = subst.size(); 669 670 size_t sub_length = 0; 671 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); 672 iter != subst.end(); ++iter) { 673 sub_length += iter->length(); 674 } 675 676 OutStringType formatted; 677 formatted.reserve(format_string.length() + sub_length); 678 679 std::vector<ReplacementOffset> r_offsets; 680 for (typename FormatStringType::const_iterator i = format_string.begin(); 681 i != format_string.end(); ++i) { 682 if ('$' == *i) { 683 if (i + 1 != format_string.end()) { 684 ++i; 685 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; 686 if ('$' == *i) { 687 while (i != format_string.end() && '$' == *i) { 688 formatted.push_back('$'); 689 ++i; 690 } 691 --i; 692 } else { 693 uintptr_t index = 0; 694 while (i != format_string.end() && '0' <= *i && *i <= '9') { 695 index *= 10; 696 index += *i - '0'; 697 ++i; 698 } 699 --i; 700 index -= 1; 701 if (offsets) { 702 ReplacementOffset r_offset(index, 703 static_cast<int>(formatted.size())); 704 r_offsets.insert(std::lower_bound(r_offsets.begin(), 705 r_offsets.end(), 706 r_offset, 707 &CompareParameter), 708 r_offset); 709 } 710 if (index < substitutions) 711 formatted.append(subst.at(index)); 712 } 713 } 714 } else { 715 formatted.push_back(*i); 716 } 717 } 718 if (offsets) { 719 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); 720 i != r_offsets.end(); ++i) { 721 offsets->push_back(i->offset); 722 } 723 } 724 return formatted; 725} 726 727string16 ReplaceStringPlaceholders(const string16& format_string, 728 const std::vector<string16>& subst, 729 std::vector<size_t>* offsets) { 730 return DoReplaceStringPlaceholders(format_string, subst, offsets); 731} 732 733std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, 734 const std::vector<std::string>& subst, 735 std::vector<size_t>* offsets) { 736 return DoReplaceStringPlaceholders(format_string, subst, offsets); 737} 738 739string16 ReplaceStringPlaceholders(const string16& format_string, 740 const string16& a, 741 size_t* offset) { 742 std::vector<size_t> offsets; 743 std::vector<string16> subst; 744 subst.push_back(a); 745 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 746 747 DCHECK_EQ(1U, offsets.size()); 748 if (offset) 749 *offset = offsets[0]; 750 return result; 751} 752 753static bool IsWildcard(base_icu::UChar32 character) { 754 return character == '*' || character == '?'; 755} 756 757// Move the strings pointers to the point where they start to differ. 758template <typename CHAR, typename NEXT> 759static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, 760 const CHAR** string, const CHAR* string_end, 761 NEXT next) { 762 const CHAR* escape = NULL; 763 while (*pattern != pattern_end && *string != string_end) { 764 if (!escape && IsWildcard(**pattern)) { 765 // We don't want to match wildcard here, except if it's escaped. 766 return; 767 } 768 769 // Check if the escapement char is found. If so, skip it and move to the 770 // next character. 771 if (!escape && **pattern == '\\') { 772 escape = *pattern; 773 next(pattern, pattern_end); 774 continue; 775 } 776 777 // Check if the chars match, if so, increment the ptrs. 778 const CHAR* pattern_next = *pattern; 779 const CHAR* string_next = *string; 780 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); 781 if (pattern_char == next(&string_next, string_end) && 782 pattern_char != (base_icu::UChar32) CBU_SENTINEL) { 783 *pattern = pattern_next; 784 *string = string_next; 785 } else { 786 // Uh ho, it did not match, we are done. If the last char was an 787 // escapement, that means that it was an error to advance the ptr here, 788 // let's put it back where it was. This also mean that the MatchPattern 789 // function will return false because if we can't match an escape char 790 // here, then no one will. 791 if (escape) { 792 *pattern = escape; 793 } 794 return; 795 } 796 797 escape = NULL; 798 } 799} 800 801template <typename CHAR, typename NEXT> 802static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { 803 while (*pattern != end) { 804 if (!IsWildcard(**pattern)) 805 return; 806 next(pattern, end); 807 } 808} 809 810template <typename CHAR, typename NEXT> 811static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, 812 const CHAR* pattern, const CHAR* pattern_end, 813 int depth, 814 NEXT next) { 815 const int kMaxDepth = 16; 816 if (depth > kMaxDepth) 817 return false; 818 819 // Eat all the matching chars. 820 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); 821 822 // If the string is empty, then the pattern must be empty too, or contains 823 // only wildcards. 824 if (eval == eval_end) { 825 EatWildcard(&pattern, pattern_end, next); 826 return pattern == pattern_end; 827 } 828 829 // Pattern is empty but not string, this is not a match. 830 if (pattern == pattern_end) 831 return false; 832 833 // If this is a question mark, then we need to compare the rest with 834 // the current string or the string with one character eaten. 835 const CHAR* next_pattern = pattern; 836 next(&next_pattern, pattern_end); 837 if (pattern[0] == '?') { 838 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 839 depth + 1, next)) 840 return true; 841 const CHAR* next_eval = eval; 842 next(&next_eval, eval_end); 843 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, 844 depth + 1, next)) 845 return true; 846 } 847 848 // This is a *, try to match all the possible substrings with the remainder 849 // of the pattern. 850 if (pattern[0] == '*') { 851 // Collapse duplicate wild cards (********** into *) so that the 852 // method does not recurse unnecessarily. http://crbug.com/52839 853 EatWildcard(&next_pattern, pattern_end, next); 854 855 while (eval != eval_end) { 856 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 857 depth + 1, next)) 858 return true; 859 eval++; 860 } 861 862 // We reached the end of the string, let see if the pattern contains only 863 // wildcards. 864 if (eval == eval_end) { 865 EatWildcard(&pattern, pattern_end, next); 866 if (pattern != pattern_end) 867 return false; 868 return true; 869 } 870 } 871 872 return false; 873} 874 875struct NextCharUTF8 { 876 base_icu::UChar32 operator()(const char** p, const char* end) { 877 base_icu::UChar32 c; 878 int offset = 0; 879 CBU8_NEXT(*p, offset, end - *p, c); 880 *p += offset; 881 return c; 882 } 883}; 884 885struct NextCharUTF16 { 886 base_icu::UChar32 operator()(const char16** p, const char16* end) { 887 base_icu::UChar32 c; 888 int offset = 0; 889 CBU16_NEXT(*p, offset, end - *p, c); 890 *p += offset; 891 return c; 892 } 893}; 894 895bool MatchPattern(const base::StringPiece& eval, 896 const base::StringPiece& pattern) { 897 return MatchPatternT(eval.data(), eval.data() + eval.size(), 898 pattern.data(), pattern.data() + pattern.size(), 899 0, NextCharUTF8()); 900} 901 902bool MatchPattern(const string16& eval, const string16& pattern) { 903 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(), 904 pattern.c_str(), pattern.c_str() + pattern.size(), 905 0, NextCharUTF16()); 906} 907 908// The following code is compatible with the OpenBSD lcpy interface. See: 909// http://www.gratisoft.us/todd/papers/strlcpy.html 910// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 911 912namespace { 913 914template <typename CHAR> 915size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 916 for (size_t i = 0; i < dst_size; ++i) { 917 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 918 return i; 919 } 920 921 // We were left off at dst_size. We over copied 1 byte. Null terminate. 922 if (dst_size != 0) 923 dst[dst_size - 1] = 0; 924 925 // Count the rest of the |src|, and return it's length in characters. 926 while (src[dst_size]) ++dst_size; 927 return dst_size; 928} 929 930} // namespace 931 932size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { 933 return lcpyT<char>(dst, src, dst_size); 934} 935size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 936 return lcpyT<wchar_t>(dst, src, dst_size); 937} 938