string_util.cc revision cedac228d2dd51db4b79ea1e72c7f249408ee061
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/strings/string_util.h" 6 7#include <ctype.h> 8#include <errno.h> 9#include <math.h> 10#include <stdarg.h> 11#include <stdio.h> 12#include <stdlib.h> 13#include <string.h> 14#include <time.h> 15#include <wchar.h> 16#include <wctype.h> 17 18#include <algorithm> 19#include <vector> 20 21#include "base/basictypes.h" 22#include "base/logging.h" 23#include "base/memory/singleton.h" 24#include "base/strings/utf_string_conversion_utils.h" 25#include "base/strings/utf_string_conversions.h" 26#include "base/third_party/icu/icu_utf.h" 27#include "build/build_config.h" 28 29// Remove when this entire file is in the base namespace. 30using base::char16; 31using base::string16; 32 33namespace { 34 35// Force the singleton used by EmptyString[16] to be a unique type. This 36// prevents other code that might accidentally use Singleton<string> from 37// getting our internal one. 38struct EmptyStrings { 39 EmptyStrings() {} 40 const std::string s; 41 const string16 s16; 42 43 static EmptyStrings* GetInstance() { 44 return Singleton<EmptyStrings>::get(); 45 } 46}; 47 48// Used by ReplaceStringPlaceholders to track the position in the string of 49// replaced parameters. 50struct ReplacementOffset { 51 ReplacementOffset(uintptr_t parameter, size_t offset) 52 : parameter(parameter), 53 offset(offset) {} 54 55 // Index of the parameter. 56 uintptr_t parameter; 57 58 // Starting position in the string. 59 size_t offset; 60}; 61 62static bool CompareParameter(const ReplacementOffset& elem1, 63 const ReplacementOffset& elem2) { 64 return elem1.parameter < elem2.parameter; 65} 66 67} // namespace 68 69namespace base { 70 71bool IsWprintfFormatPortable(const wchar_t* format) { 72 for (const wchar_t* position = format; *position != '\0'; ++position) { 73 if (*position == '%') { 74 bool in_specification = true; 75 bool modifier_l = false; 76 while (in_specification) { 77 // Eat up characters until reaching a known specifier. 78 if (*++position == '\0') { 79 // The format string ended in the middle of a specification. Call 80 // it portable because no unportable specifications were found. The 81 // string is equally broken on all platforms. 82 return true; 83 } 84 85 if (*position == 'l') { 86 // 'l' is the only thing that can save the 's' and 'c' specifiers. 87 modifier_l = true; 88 } else if (((*position == 's' || *position == 'c') && !modifier_l) || 89 *position == 'S' || *position == 'C' || *position == 'F' || 90 *position == 'D' || *position == 'O' || *position == 'U') { 91 // Not portable. 92 return false; 93 } 94 95 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 96 // Portable, keep scanning the rest of the format string. 97 in_specification = false; 98 } 99 } 100 } 101 } 102 103 return true; 104} 105 106const std::string& EmptyString() { 107 return EmptyStrings::GetInstance()->s; 108} 109 110const string16& EmptyString16() { 111 return EmptyStrings::GetInstance()->s16; 112} 113 114template<typename STR> 115bool ReplaceCharsT(const STR& input, 116 const STR& replace_chars, 117 const STR& replace_with, 118 STR* output) { 119 bool removed = false; 120 size_t replace_length = replace_with.length(); 121 122 *output = input; 123 124 size_t found = output->find_first_of(replace_chars); 125 while (found != STR::npos) { 126 removed = true; 127 output->replace(found, 1, replace_with); 128 found = output->find_first_of(replace_chars, found + replace_length); 129 } 130 131 return removed; 132} 133 134bool ReplaceChars(const string16& input, 135 const base::StringPiece16& replace_chars, 136 const string16& replace_with, 137 string16* output) { 138 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); 139} 140 141bool ReplaceChars(const std::string& input, 142 const base::StringPiece& replace_chars, 143 const std::string& replace_with, 144 std::string* output) { 145 return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output); 146} 147 148bool RemoveChars(const string16& input, 149 const base::StringPiece16& remove_chars, 150 string16* output) { 151 return ReplaceChars(input, remove_chars.as_string(), string16(), output); 152} 153 154bool RemoveChars(const std::string& input, 155 const base::StringPiece& remove_chars, 156 std::string* output) { 157 return ReplaceChars(input, remove_chars.as_string(), std::string(), output); 158} 159 160template<typename STR> 161TrimPositions TrimStringT(const STR& input, 162 const STR& trim_chars, 163 TrimPositions positions, 164 STR* output) { 165 // Find the edges of leading/trailing whitespace as desired. 166 const size_t last_char = input.length() - 1; 167 const size_t first_good_char = (positions & TRIM_LEADING) ? 168 input.find_first_not_of(trim_chars) : 0; 169 const size_t last_good_char = (positions & TRIM_TRAILING) ? 170 input.find_last_not_of(trim_chars) : last_char; 171 172 // When the string was all whitespace, report that we stripped off whitespace 173 // from whichever position the caller was interested in. For empty input, we 174 // stripped no whitespace, but we still need to clear |output|. 175 if (input.empty() || 176 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { 177 bool input_was_empty = input.empty(); // in case output == &input 178 output->clear(); 179 return input_was_empty ? TRIM_NONE : positions; 180 } 181 182 // Trim the whitespace. 183 *output = 184 input.substr(first_good_char, last_good_char - first_good_char + 1); 185 186 // Return where we trimmed from. 187 return static_cast<TrimPositions>( 188 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 189 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 190} 191 192bool TrimString(const string16& input, 193 const base::StringPiece16& trim_chars, 194 string16* output) { 195 return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) != 196 TRIM_NONE; 197} 198 199bool TrimString(const std::string& input, 200 const base::StringPiece& trim_chars, 201 std::string* output) { 202 return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) != 203 TRIM_NONE; 204} 205 206void TruncateUTF8ToByteSize(const std::string& input, 207 const size_t byte_size, 208 std::string* output) { 209 DCHECK(output); 210 if (byte_size > input.length()) { 211 *output = input; 212 return; 213 } 214 DCHECK_LE(byte_size, static_cast<uint32>(kint32max)); 215 // Note: This cast is necessary because CBU8_NEXT uses int32s. 216 int32 truncation_length = static_cast<int32>(byte_size); 217 int32 char_index = truncation_length - 1; 218 const char* data = input.data(); 219 220 // Using CBU8, we will move backwards from the truncation point 221 // to the beginning of the string looking for a valid UTF8 222 // character. Once a full UTF8 character is found, we will 223 // truncate the string to the end of that character. 224 while (char_index >= 0) { 225 int32 prev = char_index; 226 uint32 code_point = 0; 227 CBU8_NEXT(data, char_index, truncation_length, code_point); 228 if (!IsValidCharacter(code_point) || 229 !IsValidCodepoint(code_point)) { 230 char_index = prev - 1; 231 } else { 232 break; 233 } 234 } 235 236 if (char_index >= 0 ) 237 *output = input.substr(0, char_index); 238 else 239 output->clear(); 240} 241 242TrimPositions TrimWhitespace(const string16& input, 243 TrimPositions positions, 244 string16* output) { 245 return TrimStringT(input, base::string16(kWhitespaceUTF16), positions, 246 output); 247} 248 249TrimPositions TrimWhitespaceASCII(const std::string& input, 250 TrimPositions positions, 251 std::string* output) { 252 return TrimStringT(input, std::string(kWhitespaceASCII), positions, output); 253} 254 255// This function is only for backward-compatibility. 256// To be removed when all callers are updated. 257TrimPositions TrimWhitespace(const std::string& input, 258 TrimPositions positions, 259 std::string* output) { 260 return TrimWhitespaceASCII(input, positions, output); 261} 262 263template<typename STR> 264STR CollapseWhitespaceT(const STR& text, 265 bool trim_sequences_with_line_breaks) { 266 STR result; 267 result.resize(text.size()); 268 269 // Set flags to pretend we're already in a trimmed whitespace sequence, so we 270 // will trim any leading whitespace. 271 bool in_whitespace = true; 272 bool already_trimmed = true; 273 274 int chars_written = 0; 275 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 276 if (IsWhitespace(*i)) { 277 if (!in_whitespace) { 278 // Reduce all whitespace sequences to a single space. 279 in_whitespace = true; 280 result[chars_written++] = L' '; 281 } 282 if (trim_sequences_with_line_breaks && !already_trimmed && 283 ((*i == '\n') || (*i == '\r'))) { 284 // Whitespace sequences containing CR or LF are eliminated entirely. 285 already_trimmed = true; 286 --chars_written; 287 } 288 } else { 289 // Non-whitespace chracters are copied straight across. 290 in_whitespace = false; 291 already_trimmed = false; 292 result[chars_written++] = *i; 293 } 294 } 295 296 if (in_whitespace && !already_trimmed) { 297 // Any trailing whitespace is eliminated. 298 --chars_written; 299 } 300 301 result.resize(chars_written); 302 return result; 303} 304 305string16 CollapseWhitespace(const string16& text, 306 bool trim_sequences_with_line_breaks) { 307 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 308} 309 310std::string CollapseWhitespaceASCII(const std::string& text, 311 bool trim_sequences_with_line_breaks) { 312 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 313} 314 315bool ContainsOnlyChars(const StringPiece& input, 316 const StringPiece& characters) { 317 return input.find_first_not_of(characters) == StringPiece::npos; 318} 319 320bool ContainsOnlyChars(const StringPiece16& input, 321 const StringPiece16& characters) { 322 return input.find_first_not_of(characters) == StringPiece16::npos; 323} 324 325template<class STR> 326static bool DoIsStringASCII(const STR& str) { 327 for (size_t i = 0; i < str.length(); i++) { 328 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; 329 if (c > 0x7F) 330 return false; 331 } 332 return true; 333} 334 335bool IsStringASCII(const StringPiece& str) { 336 return DoIsStringASCII(str); 337} 338 339bool IsStringASCII(const string16& str) { 340 return DoIsStringASCII(str); 341} 342 343bool IsStringUTF8(const std::string& str) { 344 const char *src = str.data(); 345 int32 src_len = static_cast<int32>(str.length()); 346 int32 char_index = 0; 347 348 while (char_index < src_len) { 349 int32 code_point; 350 CBU8_NEXT(src, char_index, src_len, code_point); 351 if (!IsValidCharacter(code_point)) 352 return false; 353 } 354 return true; 355} 356 357} // namespace base 358 359template<typename Iter> 360static inline bool DoLowerCaseEqualsASCII(Iter a_begin, 361 Iter a_end, 362 const char* b) { 363 for (Iter it = a_begin; it != a_end; ++it, ++b) { 364 if (!*b || base::ToLowerASCII(*it) != *b) 365 return false; 366 } 367 return *b == 0; 368} 369 370// Front-ends for LowerCaseEqualsASCII. 371bool LowerCaseEqualsASCII(const std::string& a, const char* b) { 372 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 373} 374 375bool LowerCaseEqualsASCII(const string16& a, const char* b) { 376 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 377} 378 379bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 380 std::string::const_iterator a_end, 381 const char* b) { 382 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 383} 384 385bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 386 string16::const_iterator a_end, 387 const char* b) { 388 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 389} 390 391// TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here. 392#if !defined(OS_ANDROID) 393bool LowerCaseEqualsASCII(const char* a_begin, 394 const char* a_end, 395 const char* b) { 396 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 397} 398 399bool LowerCaseEqualsASCII(const char16* a_begin, 400 const char16* a_end, 401 const char* b) { 402 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 403} 404 405#endif // !defined(OS_ANDROID) 406 407bool EqualsASCII(const string16& a, const base::StringPiece& b) { 408 if (a.length() != b.length()) 409 return false; 410 return std::equal(b.begin(), b.end(), a.begin()); 411} 412 413bool StartsWithASCII(const std::string& str, 414 const std::string& search, 415 bool case_sensitive) { 416 if (case_sensitive) 417 return str.compare(0, search.length(), search) == 0; 418 else 419 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; 420} 421 422template <typename STR> 423bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { 424 if (case_sensitive) { 425 return str.compare(0, search.length(), search) == 0; 426 } else { 427 if (search.size() > str.size()) 428 return false; 429 return std::equal(search.begin(), search.end(), str.begin(), 430 base::CaseInsensitiveCompare<typename STR::value_type>()); 431 } 432} 433 434bool StartsWith(const string16& str, const string16& search, 435 bool case_sensitive) { 436 return StartsWithT(str, search, case_sensitive); 437} 438 439template <typename STR> 440bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { 441 size_t str_length = str.length(); 442 size_t search_length = search.length(); 443 if (search_length > str_length) 444 return false; 445 if (case_sensitive) 446 return str.compare(str_length - search_length, search_length, search) == 0; 447 return std::equal(search.begin(), search.end(), 448 str.begin() + (str_length - search_length), 449 base::CaseInsensitiveCompare<typename STR::value_type>()); 450} 451 452bool EndsWith(const std::string& str, const std::string& search, 453 bool case_sensitive) { 454 return EndsWithT(str, search, case_sensitive); 455} 456 457bool EndsWith(const string16& str, const string16& search, 458 bool case_sensitive) { 459 return EndsWithT(str, search, case_sensitive); 460} 461 462static const char* const kByteStringsUnlocalized[] = { 463 " B", 464 " kB", 465 " MB", 466 " GB", 467 " TB", 468 " PB" 469}; 470 471string16 FormatBytesUnlocalized(int64 bytes) { 472 double unit_amount = static_cast<double>(bytes); 473 size_t dimension = 0; 474 const int kKilo = 1024; 475 while (unit_amount >= kKilo && 476 dimension < arraysize(kByteStringsUnlocalized) - 1) { 477 unit_amount /= kKilo; 478 dimension++; 479 } 480 481 char buf[64]; 482 if (bytes != 0 && dimension > 0 && unit_amount < 100) { 483 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount, 484 kByteStringsUnlocalized[dimension]); 485 } else { 486 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, 487 kByteStringsUnlocalized[dimension]); 488 } 489 490 return base::ASCIIToUTF16(buf); 491} 492 493template<class StringType> 494void DoReplaceSubstringsAfterOffset(StringType* str, 495 size_t start_offset, 496 const StringType& find_this, 497 const StringType& replace_with, 498 bool replace_all) { 499 if ((start_offset == StringType::npos) || (start_offset >= str->length())) 500 return; 501 502 DCHECK(!find_this.empty()); 503 for (size_t offs(str->find(find_this, start_offset)); 504 offs != StringType::npos; offs = str->find(find_this, offs)) { 505 str->replace(offs, find_this.length(), replace_with); 506 offs += replace_with.length(); 507 508 if (!replace_all) 509 break; 510 } 511} 512 513void ReplaceFirstSubstringAfterOffset(string16* str, 514 size_t start_offset, 515 const string16& find_this, 516 const string16& replace_with) { 517 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 518 false); // replace first instance 519} 520 521void ReplaceFirstSubstringAfterOffset(std::string* str, 522 size_t start_offset, 523 const std::string& find_this, 524 const std::string& replace_with) { 525 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 526 false); // replace first instance 527} 528 529void ReplaceSubstringsAfterOffset(string16* str, 530 size_t start_offset, 531 const string16& find_this, 532 const string16& replace_with) { 533 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 534 true); // replace all instances 535} 536 537void ReplaceSubstringsAfterOffset(std::string* str, 538 size_t start_offset, 539 const std::string& find_this, 540 const std::string& replace_with) { 541 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 542 true); // replace all instances 543} 544 545 546template<typename STR> 547static size_t TokenizeT(const STR& str, 548 const STR& delimiters, 549 std::vector<STR>* tokens) { 550 tokens->clear(); 551 552 size_t start = str.find_first_not_of(delimiters); 553 while (start != STR::npos) { 554 size_t end = str.find_first_of(delimiters, start + 1); 555 if (end == STR::npos) { 556 tokens->push_back(str.substr(start)); 557 break; 558 } else { 559 tokens->push_back(str.substr(start, end - start)); 560 start = str.find_first_not_of(delimiters, end + 1); 561 } 562 } 563 564 return tokens->size(); 565} 566 567size_t Tokenize(const string16& str, 568 const string16& delimiters, 569 std::vector<string16>* tokens) { 570 return TokenizeT(str, delimiters, tokens); 571} 572 573size_t Tokenize(const std::string& str, 574 const std::string& delimiters, 575 std::vector<std::string>* tokens) { 576 return TokenizeT(str, delimiters, tokens); 577} 578 579size_t Tokenize(const base::StringPiece& str, 580 const base::StringPiece& delimiters, 581 std::vector<base::StringPiece>* tokens) { 582 return TokenizeT(str, delimiters, tokens); 583} 584 585template<typename STR> 586static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) { 587 if (parts.empty()) 588 return STR(); 589 590 STR result(parts[0]); 591 typename std::vector<STR>::const_iterator iter = parts.begin(); 592 ++iter; 593 594 for (; iter != parts.end(); ++iter) { 595 result += sep; 596 result += *iter; 597 } 598 599 return result; 600} 601 602std::string JoinString(const std::vector<std::string>& parts, char sep) { 603 return JoinStringT(parts, std::string(1, sep)); 604} 605 606string16 JoinString(const std::vector<string16>& parts, char16 sep) { 607 return JoinStringT(parts, string16(1, sep)); 608} 609 610std::string JoinString(const std::vector<std::string>& parts, 611 const std::string& separator) { 612 return JoinStringT(parts, separator); 613} 614 615string16 JoinString(const std::vector<string16>& parts, 616 const string16& separator) { 617 return JoinStringT(parts, separator); 618} 619 620template<class FormatStringType, class OutStringType> 621OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, 622 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { 623 size_t substitutions = subst.size(); 624 625 size_t sub_length = 0; 626 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); 627 iter != subst.end(); ++iter) { 628 sub_length += iter->length(); 629 } 630 631 OutStringType formatted; 632 formatted.reserve(format_string.length() + sub_length); 633 634 std::vector<ReplacementOffset> r_offsets; 635 for (typename FormatStringType::const_iterator i = format_string.begin(); 636 i != format_string.end(); ++i) { 637 if ('$' == *i) { 638 if (i + 1 != format_string.end()) { 639 ++i; 640 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; 641 if ('$' == *i) { 642 while (i != format_string.end() && '$' == *i) { 643 formatted.push_back('$'); 644 ++i; 645 } 646 --i; 647 } else { 648 uintptr_t index = 0; 649 while (i != format_string.end() && '0' <= *i && *i <= '9') { 650 index *= 10; 651 index += *i - '0'; 652 ++i; 653 } 654 --i; 655 index -= 1; 656 if (offsets) { 657 ReplacementOffset r_offset(index, 658 static_cast<int>(formatted.size())); 659 r_offsets.insert(std::lower_bound(r_offsets.begin(), 660 r_offsets.end(), 661 r_offset, 662 &CompareParameter), 663 r_offset); 664 } 665 if (index < substitutions) 666 formatted.append(subst.at(index)); 667 } 668 } 669 } else { 670 formatted.push_back(*i); 671 } 672 } 673 if (offsets) { 674 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); 675 i != r_offsets.end(); ++i) { 676 offsets->push_back(i->offset); 677 } 678 } 679 return formatted; 680} 681 682string16 ReplaceStringPlaceholders(const string16& format_string, 683 const std::vector<string16>& subst, 684 std::vector<size_t>* offsets) { 685 return DoReplaceStringPlaceholders(format_string, subst, offsets); 686} 687 688std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, 689 const std::vector<std::string>& subst, 690 std::vector<size_t>* offsets) { 691 return DoReplaceStringPlaceholders(format_string, subst, offsets); 692} 693 694string16 ReplaceStringPlaceholders(const string16& format_string, 695 const string16& a, 696 size_t* offset) { 697 std::vector<size_t> offsets; 698 std::vector<string16> subst; 699 subst.push_back(a); 700 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 701 702 DCHECK_EQ(1U, offsets.size()); 703 if (offset) 704 *offset = offsets[0]; 705 return result; 706} 707 708static bool IsWildcard(base_icu::UChar32 character) { 709 return character == '*' || character == '?'; 710} 711 712// Move the strings pointers to the point where they start to differ. 713template <typename CHAR, typename NEXT> 714static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, 715 const CHAR** string, const CHAR* string_end, 716 NEXT next) { 717 const CHAR* escape = NULL; 718 while (*pattern != pattern_end && *string != string_end) { 719 if (!escape && IsWildcard(**pattern)) { 720 // We don't want to match wildcard here, except if it's escaped. 721 return; 722 } 723 724 // Check if the escapement char is found. If so, skip it and move to the 725 // next character. 726 if (!escape && **pattern == '\\') { 727 escape = *pattern; 728 next(pattern, pattern_end); 729 continue; 730 } 731 732 // Check if the chars match, if so, increment the ptrs. 733 const CHAR* pattern_next = *pattern; 734 const CHAR* string_next = *string; 735 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); 736 if (pattern_char == next(&string_next, string_end) && 737 pattern_char != (base_icu::UChar32) CBU_SENTINEL) { 738 *pattern = pattern_next; 739 *string = string_next; 740 } else { 741 // Uh ho, it did not match, we are done. If the last char was an 742 // escapement, that means that it was an error to advance the ptr here, 743 // let's put it back where it was. This also mean that the MatchPattern 744 // function will return false because if we can't match an escape char 745 // here, then no one will. 746 if (escape) { 747 *pattern = escape; 748 } 749 return; 750 } 751 752 escape = NULL; 753 } 754} 755 756template <typename CHAR, typename NEXT> 757static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { 758 while (*pattern != end) { 759 if (!IsWildcard(**pattern)) 760 return; 761 next(pattern, end); 762 } 763} 764 765template <typename CHAR, typename NEXT> 766static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, 767 const CHAR* pattern, const CHAR* pattern_end, 768 int depth, 769 NEXT next) { 770 const int kMaxDepth = 16; 771 if (depth > kMaxDepth) 772 return false; 773 774 // Eat all the matching chars. 775 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); 776 777 // If the string is empty, then the pattern must be empty too, or contains 778 // only wildcards. 779 if (eval == eval_end) { 780 EatWildcard(&pattern, pattern_end, next); 781 return pattern == pattern_end; 782 } 783 784 // Pattern is empty but not string, this is not a match. 785 if (pattern == pattern_end) 786 return false; 787 788 // If this is a question mark, then we need to compare the rest with 789 // the current string or the string with one character eaten. 790 const CHAR* next_pattern = pattern; 791 next(&next_pattern, pattern_end); 792 if (pattern[0] == '?') { 793 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 794 depth + 1, next)) 795 return true; 796 const CHAR* next_eval = eval; 797 next(&next_eval, eval_end); 798 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, 799 depth + 1, next)) 800 return true; 801 } 802 803 // This is a *, try to match all the possible substrings with the remainder 804 // of the pattern. 805 if (pattern[0] == '*') { 806 // Collapse duplicate wild cards (********** into *) so that the 807 // method does not recurse unnecessarily. http://crbug.com/52839 808 EatWildcard(&next_pattern, pattern_end, next); 809 810 while (eval != eval_end) { 811 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 812 depth + 1, next)) 813 return true; 814 eval++; 815 } 816 817 // We reached the end of the string, let see if the pattern contains only 818 // wildcards. 819 if (eval == eval_end) { 820 EatWildcard(&pattern, pattern_end, next); 821 if (pattern != pattern_end) 822 return false; 823 return true; 824 } 825 } 826 827 return false; 828} 829 830struct NextCharUTF8 { 831 base_icu::UChar32 operator()(const char** p, const char* end) { 832 base_icu::UChar32 c; 833 int offset = 0; 834 CBU8_NEXT(*p, offset, end - *p, c); 835 *p += offset; 836 return c; 837 } 838}; 839 840struct NextCharUTF16 { 841 base_icu::UChar32 operator()(const char16** p, const char16* end) { 842 base_icu::UChar32 c; 843 int offset = 0; 844 CBU16_NEXT(*p, offset, end - *p, c); 845 *p += offset; 846 return c; 847 } 848}; 849 850bool MatchPattern(const base::StringPiece& eval, 851 const base::StringPiece& pattern) { 852 return MatchPatternT(eval.data(), eval.data() + eval.size(), 853 pattern.data(), pattern.data() + pattern.size(), 854 0, NextCharUTF8()); 855} 856 857bool MatchPattern(const string16& eval, const string16& pattern) { 858 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(), 859 pattern.c_str(), pattern.c_str() + pattern.size(), 860 0, NextCharUTF16()); 861} 862 863// The following code is compatible with the OpenBSD lcpy interface. See: 864// http://www.gratisoft.us/todd/papers/strlcpy.html 865// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 866 867namespace { 868 869template <typename CHAR> 870size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 871 for (size_t i = 0; i < dst_size; ++i) { 872 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 873 return i; 874 } 875 876 // We were left off at dst_size. We over copied 1 byte. Null terminate. 877 if (dst_size != 0) 878 dst[dst_size - 1] = 0; 879 880 // Count the rest of the |src|, and return it's length in characters. 881 while (src[dst_size]) ++dst_size; 882 return dst_size; 883} 884 885} // namespace 886 887size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { 888 return lcpyT<char>(dst, src, dst_size); 889} 890size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 891 return lcpyT<wchar_t>(dst, src, dst_size); 892} 893