string_util.cc revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/strings/string_util.h" 6 7#include <ctype.h> 8#include <errno.h> 9#include <math.h> 10#include <stdarg.h> 11#include <stdio.h> 12#include <stdlib.h> 13#include <string.h> 14#include <time.h> 15#include <wchar.h> 16#include <wctype.h> 17 18#include <algorithm> 19#include <vector> 20 21#include "base/basictypes.h" 22#include "base/logging.h" 23#include "base/memory/singleton.h" 24#include "base/strings/utf_string_conversion_utils.h" 25#include "base/strings/utf_string_conversions.h" 26#include "base/third_party/icu/icu_utf.h" 27#include "build/build_config.h" 28 29// Remove when this entire file is in the base namespace. 30using base::char16; 31using base::string16; 32 33namespace { 34 35// Force the singleton used by Empty[W]String[16] to be a unique type. This 36// prevents other code that might accidentally use Singleton<string> from 37// getting our internal one. 38struct EmptyStrings { 39 EmptyStrings() {} 40 const std::string s; 41 const std::wstring ws; 42 const string16 s16; 43 44 static EmptyStrings* GetInstance() { 45 return Singleton<EmptyStrings>::get(); 46 } 47}; 48 49// Used by ReplaceStringPlaceholders to track the position in the string of 50// replaced parameters. 51struct ReplacementOffset { 52 ReplacementOffset(uintptr_t parameter, size_t offset) 53 : parameter(parameter), 54 offset(offset) {} 55 56 // Index of the parameter. 57 uintptr_t parameter; 58 59 // Starting position in the string. 60 size_t offset; 61}; 62 63static bool CompareParameter(const ReplacementOffset& elem1, 64 const ReplacementOffset& elem2) { 65 return elem1.parameter < elem2.parameter; 66} 67 68} // namespace 69 70namespace base { 71 72bool IsWprintfFormatPortable(const wchar_t* format) { 73 for (const wchar_t* position = format; *position != '\0'; ++position) { 74 if (*position == '%') { 75 bool in_specification = true; 76 bool modifier_l = false; 77 while (in_specification) { 78 // Eat up characters until reaching a known specifier. 79 if (*++position == '\0') { 80 // The format string ended in the middle of a specification. Call 81 // it portable because no unportable specifications were found. The 82 // string is equally broken on all platforms. 83 return true; 84 } 85 86 if (*position == 'l') { 87 // 'l' is the only thing that can save the 's' and 'c' specifiers. 88 modifier_l = true; 89 } else if (((*position == 's' || *position == 'c') && !modifier_l) || 90 *position == 'S' || *position == 'C' || *position == 'F' || 91 *position == 'D' || *position == 'O' || *position == 'U') { 92 // Not portable. 93 return false; 94 } 95 96 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 97 // Portable, keep scanning the rest of the format string. 98 in_specification = false; 99 } 100 } 101 } 102 } 103 104 return true; 105} 106 107const std::string& EmptyString() { 108 return EmptyStrings::GetInstance()->s; 109} 110 111const std::wstring& EmptyWString() { 112 return EmptyStrings::GetInstance()->ws; 113} 114 115const string16& EmptyString16() { 116 return EmptyStrings::GetInstance()->s16; 117} 118 119template<typename STR> 120bool ReplaceCharsT(const STR& input, 121 const typename STR::value_type replace_chars[], 122 const STR& replace_with, 123 STR* output) { 124 bool removed = false; 125 size_t replace_length = replace_with.length(); 126 127 *output = input; 128 129 size_t found = output->find_first_of(replace_chars); 130 while (found != STR::npos) { 131 removed = true; 132 output->replace(found, 1, replace_with); 133 found = output->find_first_of(replace_chars, found + replace_length); 134 } 135 136 return removed; 137} 138 139bool ReplaceChars(const string16& input, 140 const char16 replace_chars[], 141 const string16& replace_with, 142 string16* output) { 143 return ReplaceCharsT(input, replace_chars, replace_with, output); 144} 145 146bool ReplaceChars(const std::string& input, 147 const char replace_chars[], 148 const std::string& replace_with, 149 std::string* output) { 150 return ReplaceCharsT(input, replace_chars, replace_with, output); 151} 152 153bool RemoveChars(const string16& input, 154 const char16 remove_chars[], 155 string16* output) { 156 return ReplaceChars(input, remove_chars, string16(), output); 157} 158 159bool RemoveChars(const std::string& input, 160 const char remove_chars[], 161 std::string* output) { 162 return ReplaceChars(input, remove_chars, std::string(), output); 163} 164 165template<typename STR> 166TrimPositions TrimStringT(const STR& input, 167 const typename STR::value_type trim_chars[], 168 TrimPositions positions, 169 STR* output) { 170 // Find the edges of leading/trailing whitespace as desired. 171 const typename STR::size_type last_char = input.length() - 1; 172 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ? 173 input.find_first_not_of(trim_chars) : 0; 174 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ? 175 input.find_last_not_of(trim_chars) : last_char; 176 177 // When the string was all whitespace, report that we stripped off whitespace 178 // from whichever position the caller was interested in. For empty input, we 179 // stripped no whitespace, but we still need to clear |output|. 180 if (input.empty() || 181 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { 182 bool input_was_empty = input.empty(); // in case output == &input 183 output->clear(); 184 return input_was_empty ? TRIM_NONE : positions; 185 } 186 187 // Trim the whitespace. 188 *output = 189 input.substr(first_good_char, last_good_char - first_good_char + 1); 190 191 // Return where we trimmed from. 192 return static_cast<TrimPositions>( 193 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 194 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 195} 196 197bool TrimString(const string16& input, 198 const char16 trim_chars[], 199 string16* output) { 200 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 201} 202 203bool TrimString(const std::string& input, 204 const char trim_chars[], 205 std::string* output) { 206 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 207} 208 209void TruncateUTF8ToByteSize(const std::string& input, 210 const size_t byte_size, 211 std::string* output) { 212 DCHECK(output); 213 if (byte_size > input.length()) { 214 *output = input; 215 return; 216 } 217 DCHECK_LE(byte_size, static_cast<uint32>(kint32max)); 218 // Note: This cast is necessary because CBU8_NEXT uses int32s. 219 int32 truncation_length = static_cast<int32>(byte_size); 220 int32 char_index = truncation_length - 1; 221 const char* data = input.data(); 222 223 // Using CBU8, we will move backwards from the truncation point 224 // to the beginning of the string looking for a valid UTF8 225 // character. Once a full UTF8 character is found, we will 226 // truncate the string to the end of that character. 227 while (char_index >= 0) { 228 int32 prev = char_index; 229 uint32 code_point = 0; 230 CBU8_NEXT(data, char_index, truncation_length, code_point); 231 if (!IsValidCharacter(code_point) || 232 !IsValidCodepoint(code_point)) { 233 char_index = prev - 1; 234 } else { 235 break; 236 } 237 } 238 239 if (char_index >= 0 ) 240 *output = input.substr(0, char_index); 241 else 242 output->clear(); 243} 244 245TrimPositions TrimWhitespace(const string16& input, 246 TrimPositions positions, 247 string16* output) { 248 return TrimStringT(input, kWhitespaceUTF16, positions, output); 249} 250 251TrimPositions TrimWhitespaceASCII(const std::string& input, 252 TrimPositions positions, 253 std::string* output) { 254 return TrimStringT(input, kWhitespaceASCII, positions, output); 255} 256 257// This function is only for backward-compatibility. 258// To be removed when all callers are updated. 259TrimPositions TrimWhitespace(const std::string& input, 260 TrimPositions positions, 261 std::string* output) { 262 return TrimWhitespaceASCII(input, positions, output); 263} 264 265template<typename STR> 266STR CollapseWhitespaceT(const STR& text, 267 bool trim_sequences_with_line_breaks) { 268 STR result; 269 result.resize(text.size()); 270 271 // Set flags to pretend we're already in a trimmed whitespace sequence, so we 272 // will trim any leading whitespace. 273 bool in_whitespace = true; 274 bool already_trimmed = true; 275 276 int chars_written = 0; 277 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 278 if (IsWhitespace(*i)) { 279 if (!in_whitespace) { 280 // Reduce all whitespace sequences to a single space. 281 in_whitespace = true; 282 result[chars_written++] = L' '; 283 } 284 if (trim_sequences_with_line_breaks && !already_trimmed && 285 ((*i == '\n') || (*i == '\r'))) { 286 // Whitespace sequences containing CR or LF are eliminated entirely. 287 already_trimmed = true; 288 --chars_written; 289 } 290 } else { 291 // Non-whitespace chracters are copied straight across. 292 in_whitespace = false; 293 already_trimmed = false; 294 result[chars_written++] = *i; 295 } 296 } 297 298 if (in_whitespace && !already_trimmed) { 299 // Any trailing whitespace is eliminated. 300 --chars_written; 301 } 302 303 result.resize(chars_written); 304 return result; 305} 306 307string16 CollapseWhitespace(const string16& text, 308 bool trim_sequences_with_line_breaks) { 309 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 310} 311 312std::string CollapseWhitespaceASCII(const std::string& text, 313 bool trim_sequences_with_line_breaks) { 314 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 315} 316 317bool ContainsOnlyChars(const StringPiece& input, 318 const StringPiece& characters) { 319 return input.find_first_not_of(characters) == StringPiece::npos; 320} 321 322bool ContainsOnlyChars(const StringPiece16& input, 323 const StringPiece16& characters) { 324 return input.find_first_not_of(characters) == StringPiece16::npos; 325} 326 327} // namespace base 328 329template<class STR> 330static bool DoIsStringASCII(const STR& str) { 331 for (size_t i = 0; i < str.length(); i++) { 332 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; 333 if (c > 0x7F) 334 return false; 335 } 336 return true; 337} 338 339bool IsStringASCII(const base::StringPiece& str) { 340 return DoIsStringASCII(str); 341} 342 343bool IsStringASCII(const base::string16& str) { 344 return DoIsStringASCII(str); 345} 346 347bool IsStringUTF8(const std::string& str) { 348 const char *src = str.data(); 349 int32 src_len = static_cast<int32>(str.length()); 350 int32 char_index = 0; 351 352 while (char_index < src_len) { 353 int32 code_point; 354 CBU8_NEXT(src, char_index, src_len, code_point); 355 if (!base::IsValidCharacter(code_point)) 356 return false; 357 } 358 return true; 359} 360 361template<typename Iter> 362static inline bool DoLowerCaseEqualsASCII(Iter a_begin, 363 Iter a_end, 364 const char* b) { 365 for (Iter it = a_begin; it != a_end; ++it, ++b) { 366 if (!*b || base::ToLowerASCII(*it) != *b) 367 return false; 368 } 369 return *b == 0; 370} 371 372// Front-ends for LowerCaseEqualsASCII. 373bool LowerCaseEqualsASCII(const std::string& a, const char* b) { 374 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 375} 376 377bool LowerCaseEqualsASCII(const string16& a, const char* b) { 378 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 379} 380 381bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 382 std::string::const_iterator a_end, 383 const char* b) { 384 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 385} 386 387bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 388 string16::const_iterator a_end, 389 const char* b) { 390 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 391} 392 393// TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here. 394#if !defined(OS_ANDROID) 395bool LowerCaseEqualsASCII(const char* a_begin, 396 const char* a_end, 397 const char* b) { 398 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 399} 400 401bool LowerCaseEqualsASCII(const char16* a_begin, 402 const char16* a_end, 403 const char* b) { 404 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 405} 406 407#endif // !defined(OS_ANDROID) 408 409bool EqualsASCII(const string16& a, const base::StringPiece& b) { 410 if (a.length() != b.length()) 411 return false; 412 return std::equal(b.begin(), b.end(), a.begin()); 413} 414 415bool StartsWithASCII(const std::string& str, 416 const std::string& search, 417 bool case_sensitive) { 418 if (case_sensitive) 419 return str.compare(0, search.length(), search) == 0; 420 else 421 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; 422} 423 424template <typename STR> 425bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { 426 if (case_sensitive) { 427 return str.compare(0, search.length(), search) == 0; 428 } else { 429 if (search.size() > str.size()) 430 return false; 431 return std::equal(search.begin(), search.end(), str.begin(), 432 base::CaseInsensitiveCompare<typename STR::value_type>()); 433 } 434} 435 436bool StartsWith(const string16& str, const string16& search, 437 bool case_sensitive) { 438 return StartsWithT(str, search, case_sensitive); 439} 440 441template <typename STR> 442bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { 443 typename STR::size_type str_length = str.length(); 444 typename STR::size_type search_length = search.length(); 445 if (search_length > str_length) 446 return false; 447 if (case_sensitive) { 448 return str.compare(str_length - search_length, search_length, search) == 0; 449 } else { 450 return std::equal(search.begin(), search.end(), 451 str.begin() + (str_length - search_length), 452 base::CaseInsensitiveCompare<typename STR::value_type>()); 453 } 454} 455 456bool EndsWith(const std::string& str, const std::string& search, 457 bool case_sensitive) { 458 return EndsWithT(str, search, case_sensitive); 459} 460 461bool EndsWith(const string16& str, const string16& search, 462 bool case_sensitive) { 463 return EndsWithT(str, search, case_sensitive); 464} 465 466static const char* const kByteStringsUnlocalized[] = { 467 " B", 468 " kB", 469 " MB", 470 " GB", 471 " TB", 472 " PB" 473}; 474 475string16 FormatBytesUnlocalized(int64 bytes) { 476 double unit_amount = static_cast<double>(bytes); 477 size_t dimension = 0; 478 const int kKilo = 1024; 479 while (unit_amount >= kKilo && 480 dimension < arraysize(kByteStringsUnlocalized) - 1) { 481 unit_amount /= kKilo; 482 dimension++; 483 } 484 485 char buf[64]; 486 if (bytes != 0 && dimension > 0 && unit_amount < 100) { 487 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount, 488 kByteStringsUnlocalized[dimension]); 489 } else { 490 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, 491 kByteStringsUnlocalized[dimension]); 492 } 493 494 return base::ASCIIToUTF16(buf); 495} 496 497template<class StringType> 498void DoReplaceSubstringsAfterOffset(StringType* str, 499 typename StringType::size_type start_offset, 500 const StringType& find_this, 501 const StringType& replace_with, 502 bool replace_all) { 503 if ((start_offset == StringType::npos) || (start_offset >= str->length())) 504 return; 505 506 DCHECK(!find_this.empty()); 507 for (typename StringType::size_type offs(str->find(find_this, start_offset)); 508 offs != StringType::npos; offs = str->find(find_this, offs)) { 509 str->replace(offs, find_this.length(), replace_with); 510 offs += replace_with.length(); 511 512 if (!replace_all) 513 break; 514 } 515} 516 517void ReplaceFirstSubstringAfterOffset(string16* str, 518 string16::size_type start_offset, 519 const string16& find_this, 520 const string16& replace_with) { 521 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 522 false); // replace first instance 523} 524 525void ReplaceFirstSubstringAfterOffset(std::string* str, 526 std::string::size_type start_offset, 527 const std::string& find_this, 528 const std::string& replace_with) { 529 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 530 false); // replace first instance 531} 532 533void ReplaceSubstringsAfterOffset(string16* str, 534 string16::size_type start_offset, 535 const string16& find_this, 536 const string16& replace_with) { 537 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 538 true); // replace all instances 539} 540 541void ReplaceSubstringsAfterOffset(std::string* str, 542 std::string::size_type start_offset, 543 const std::string& find_this, 544 const std::string& replace_with) { 545 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 546 true); // replace all instances 547} 548 549 550template<typename STR> 551static size_t TokenizeT(const STR& str, 552 const STR& delimiters, 553 std::vector<STR>* tokens) { 554 tokens->clear(); 555 556 typename STR::size_type start = str.find_first_not_of(delimiters); 557 while (start != STR::npos) { 558 typename STR::size_type end = str.find_first_of(delimiters, start + 1); 559 if (end == STR::npos) { 560 tokens->push_back(str.substr(start)); 561 break; 562 } else { 563 tokens->push_back(str.substr(start, end - start)); 564 start = str.find_first_not_of(delimiters, end + 1); 565 } 566 } 567 568 return tokens->size(); 569} 570 571size_t Tokenize(const string16& str, 572 const string16& delimiters, 573 std::vector<string16>* tokens) { 574 return TokenizeT(str, delimiters, tokens); 575} 576 577size_t Tokenize(const std::string& str, 578 const std::string& delimiters, 579 std::vector<std::string>* tokens) { 580 return TokenizeT(str, delimiters, tokens); 581} 582 583size_t Tokenize(const base::StringPiece& str, 584 const base::StringPiece& delimiters, 585 std::vector<base::StringPiece>* tokens) { 586 return TokenizeT(str, delimiters, tokens); 587} 588 589template<typename STR> 590static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) { 591 if (parts.empty()) 592 return STR(); 593 594 STR result(parts[0]); 595 typename std::vector<STR>::const_iterator iter = parts.begin(); 596 ++iter; 597 598 for (; iter != parts.end(); ++iter) { 599 result += sep; 600 result += *iter; 601 } 602 603 return result; 604} 605 606std::string JoinString(const std::vector<std::string>& parts, char sep) { 607 return JoinStringT(parts, std::string(1, sep)); 608} 609 610string16 JoinString(const std::vector<string16>& parts, char16 sep) { 611 return JoinStringT(parts, string16(1, sep)); 612} 613 614std::string JoinString(const std::vector<std::string>& parts, 615 const std::string& separator) { 616 return JoinStringT(parts, separator); 617} 618 619string16 JoinString(const std::vector<string16>& parts, 620 const string16& separator) { 621 return JoinStringT(parts, separator); 622} 623 624template<class FormatStringType, class OutStringType> 625OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, 626 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { 627 size_t substitutions = subst.size(); 628 629 size_t sub_length = 0; 630 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); 631 iter != subst.end(); ++iter) { 632 sub_length += iter->length(); 633 } 634 635 OutStringType formatted; 636 formatted.reserve(format_string.length() + sub_length); 637 638 std::vector<ReplacementOffset> r_offsets; 639 for (typename FormatStringType::const_iterator i = format_string.begin(); 640 i != format_string.end(); ++i) { 641 if ('$' == *i) { 642 if (i + 1 != format_string.end()) { 643 ++i; 644 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; 645 if ('$' == *i) { 646 while (i != format_string.end() && '$' == *i) { 647 formatted.push_back('$'); 648 ++i; 649 } 650 --i; 651 } else { 652 uintptr_t index = 0; 653 while (i != format_string.end() && '0' <= *i && *i <= '9') { 654 index *= 10; 655 index += *i - '0'; 656 ++i; 657 } 658 --i; 659 index -= 1; 660 if (offsets) { 661 ReplacementOffset r_offset(index, 662 static_cast<int>(formatted.size())); 663 r_offsets.insert(std::lower_bound(r_offsets.begin(), 664 r_offsets.end(), 665 r_offset, 666 &CompareParameter), 667 r_offset); 668 } 669 if (index < substitutions) 670 formatted.append(subst.at(index)); 671 } 672 } 673 } else { 674 formatted.push_back(*i); 675 } 676 } 677 if (offsets) { 678 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); 679 i != r_offsets.end(); ++i) { 680 offsets->push_back(i->offset); 681 } 682 } 683 return formatted; 684} 685 686string16 ReplaceStringPlaceholders(const string16& format_string, 687 const std::vector<string16>& subst, 688 std::vector<size_t>* offsets) { 689 return DoReplaceStringPlaceholders(format_string, subst, offsets); 690} 691 692std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, 693 const std::vector<std::string>& subst, 694 std::vector<size_t>* offsets) { 695 return DoReplaceStringPlaceholders(format_string, subst, offsets); 696} 697 698string16 ReplaceStringPlaceholders(const string16& format_string, 699 const string16& a, 700 size_t* offset) { 701 std::vector<size_t> offsets; 702 std::vector<string16> subst; 703 subst.push_back(a); 704 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 705 706 DCHECK_EQ(1U, offsets.size()); 707 if (offset) 708 *offset = offsets[0]; 709 return result; 710} 711 712static bool IsWildcard(base_icu::UChar32 character) { 713 return character == '*' || character == '?'; 714} 715 716// Move the strings pointers to the point where they start to differ. 717template <typename CHAR, typename NEXT> 718static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, 719 const CHAR** string, const CHAR* string_end, 720 NEXT next) { 721 const CHAR* escape = NULL; 722 while (*pattern != pattern_end && *string != string_end) { 723 if (!escape && IsWildcard(**pattern)) { 724 // We don't want to match wildcard here, except if it's escaped. 725 return; 726 } 727 728 // Check if the escapement char is found. If so, skip it and move to the 729 // next character. 730 if (!escape && **pattern == '\\') { 731 escape = *pattern; 732 next(pattern, pattern_end); 733 continue; 734 } 735 736 // Check if the chars match, if so, increment the ptrs. 737 const CHAR* pattern_next = *pattern; 738 const CHAR* string_next = *string; 739 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); 740 if (pattern_char == next(&string_next, string_end) && 741 pattern_char != (base_icu::UChar32) CBU_SENTINEL) { 742 *pattern = pattern_next; 743 *string = string_next; 744 } else { 745 // Uh ho, it did not match, we are done. If the last char was an 746 // escapement, that means that it was an error to advance the ptr here, 747 // let's put it back where it was. This also mean that the MatchPattern 748 // function will return false because if we can't match an escape char 749 // here, then no one will. 750 if (escape) { 751 *pattern = escape; 752 } 753 return; 754 } 755 756 escape = NULL; 757 } 758} 759 760template <typename CHAR, typename NEXT> 761static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { 762 while (*pattern != end) { 763 if (!IsWildcard(**pattern)) 764 return; 765 next(pattern, end); 766 } 767} 768 769template <typename CHAR, typename NEXT> 770static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, 771 const CHAR* pattern, const CHAR* pattern_end, 772 int depth, 773 NEXT next) { 774 const int kMaxDepth = 16; 775 if (depth > kMaxDepth) 776 return false; 777 778 // Eat all the matching chars. 779 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); 780 781 // If the string is empty, then the pattern must be empty too, or contains 782 // only wildcards. 783 if (eval == eval_end) { 784 EatWildcard(&pattern, pattern_end, next); 785 return pattern == pattern_end; 786 } 787 788 // Pattern is empty but not string, this is not a match. 789 if (pattern == pattern_end) 790 return false; 791 792 // If this is a question mark, then we need to compare the rest with 793 // the current string or the string with one character eaten. 794 const CHAR* next_pattern = pattern; 795 next(&next_pattern, pattern_end); 796 if (pattern[0] == '?') { 797 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 798 depth + 1, next)) 799 return true; 800 const CHAR* next_eval = eval; 801 next(&next_eval, eval_end); 802 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, 803 depth + 1, next)) 804 return true; 805 } 806 807 // This is a *, try to match all the possible substrings with the remainder 808 // of the pattern. 809 if (pattern[0] == '*') { 810 // Collapse duplicate wild cards (********** into *) so that the 811 // method does not recurse unnecessarily. http://crbug.com/52839 812 EatWildcard(&next_pattern, pattern_end, next); 813 814 while (eval != eval_end) { 815 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 816 depth + 1, next)) 817 return true; 818 eval++; 819 } 820 821 // We reached the end of the string, let see if the pattern contains only 822 // wildcards. 823 if (eval == eval_end) { 824 EatWildcard(&pattern, pattern_end, next); 825 if (pattern != pattern_end) 826 return false; 827 return true; 828 } 829 } 830 831 return false; 832} 833 834struct NextCharUTF8 { 835 base_icu::UChar32 operator()(const char** p, const char* end) { 836 base_icu::UChar32 c; 837 int offset = 0; 838 CBU8_NEXT(*p, offset, end - *p, c); 839 *p += offset; 840 return c; 841 } 842}; 843 844struct NextCharUTF16 { 845 base_icu::UChar32 operator()(const char16** p, const char16* end) { 846 base_icu::UChar32 c; 847 int offset = 0; 848 CBU16_NEXT(*p, offset, end - *p, c); 849 *p += offset; 850 return c; 851 } 852}; 853 854bool MatchPattern(const base::StringPiece& eval, 855 const base::StringPiece& pattern) { 856 return MatchPatternT(eval.data(), eval.data() + eval.size(), 857 pattern.data(), pattern.data() + pattern.size(), 858 0, NextCharUTF8()); 859} 860 861bool MatchPattern(const string16& eval, const string16& pattern) { 862 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(), 863 pattern.c_str(), pattern.c_str() + pattern.size(), 864 0, NextCharUTF16()); 865} 866 867// The following code is compatible with the OpenBSD lcpy interface. See: 868// http://www.gratisoft.us/todd/papers/strlcpy.html 869// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 870 871namespace { 872 873template <typename CHAR> 874size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 875 for (size_t i = 0; i < dst_size; ++i) { 876 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 877 return i; 878 } 879 880 // We were left off at dst_size. We over copied 1 byte. Null terminate. 881 if (dst_size != 0) 882 dst[dst_size - 1] = 0; 883 884 // Count the rest of the |src|, and return it's length in characters. 885 while (src[dst_size]) ++dst_size; 886 return dst_size; 887} 888 889} // namespace 890 891size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { 892 return lcpyT<char>(dst, src, dst_size); 893} 894size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 895 return lcpyT<wchar_t>(dst, src, dst_size); 896} 897