string_util.cc revision f2477e01787aa58f445919b809d89e252beef54f
1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/strings/string_util.h" 6 7#include <ctype.h> 8#include <errno.h> 9#include <math.h> 10#include <stdarg.h> 11#include <stdio.h> 12#include <stdlib.h> 13#include <string.h> 14#include <time.h> 15#include <wchar.h> 16#include <wctype.h> 17 18#include <algorithm> 19#include <vector> 20 21#include "base/basictypes.h" 22#include "base/logging.h" 23#include "base/memory/singleton.h" 24#include "base/strings/utf_string_conversion_utils.h" 25#include "base/strings/utf_string_conversions.h" 26#include "base/third_party/icu/icu_utf.h" 27#include "build/build_config.h" 28 29namespace { 30 31// Force the singleton used by Empty[W]String[16] to be a unique type. This 32// prevents other code that might accidentally use Singleton<string> from 33// getting our internal one. 34struct EmptyStrings { 35 EmptyStrings() {} 36 const std::string s; 37 const std::wstring ws; 38 const string16 s16; 39 40 static EmptyStrings* GetInstance() { 41 return Singleton<EmptyStrings>::get(); 42 } 43}; 44 45// Used by ReplaceStringPlaceholders to track the position in the string of 46// replaced parameters. 47struct ReplacementOffset { 48 ReplacementOffset(uintptr_t parameter, size_t offset) 49 : parameter(parameter), 50 offset(offset) {} 51 52 // Index of the parameter. 53 uintptr_t parameter; 54 55 // Starting position in the string. 56 size_t offset; 57}; 58 59static bool CompareParameter(const ReplacementOffset& elem1, 60 const ReplacementOffset& elem2) { 61 return elem1.parameter < elem2.parameter; 62} 63 64} // namespace 65 66namespace base { 67 68bool IsWprintfFormatPortable(const wchar_t* format) { 69 for (const wchar_t* position = format; *position != '\0'; ++position) { 70 if (*position == '%') { 71 bool in_specification = true; 72 bool modifier_l = false; 73 while (in_specification) { 74 // Eat up characters until reaching a known specifier. 75 if (*++position == '\0') { 76 // The format string ended in the middle of a specification. Call 77 // it portable because no unportable specifications were found. The 78 // string is equally broken on all platforms. 79 return true; 80 } 81 82 if (*position == 'l') { 83 // 'l' is the only thing that can save the 's' and 'c' specifiers. 84 modifier_l = true; 85 } else if (((*position == 's' || *position == 'c') && !modifier_l) || 86 *position == 'S' || *position == 'C' || *position == 'F' || 87 *position == 'D' || *position == 'O' || *position == 'U') { 88 // Not portable. 89 return false; 90 } 91 92 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) { 93 // Portable, keep scanning the rest of the format string. 94 in_specification = false; 95 } 96 } 97 } 98 } 99 100 return true; 101} 102 103} // namespace base 104 105 106const std::string& EmptyString() { 107 return EmptyStrings::GetInstance()->s; 108} 109 110const std::wstring& EmptyWString() { 111 return EmptyStrings::GetInstance()->ws; 112} 113 114const string16& EmptyString16() { 115 return EmptyStrings::GetInstance()->s16; 116} 117 118template<typename STR> 119bool ReplaceCharsT(const STR& input, 120 const typename STR::value_type replace_chars[], 121 const STR& replace_with, 122 STR* output) { 123 bool removed = false; 124 size_t replace_length = replace_with.length(); 125 126 *output = input; 127 128 size_t found = output->find_first_of(replace_chars); 129 while (found != STR::npos) { 130 removed = true; 131 output->replace(found, 1, replace_with); 132 found = output->find_first_of(replace_chars, found + replace_length); 133 } 134 135 return removed; 136} 137 138bool ReplaceChars(const string16& input, 139 const char16 replace_chars[], 140 const string16& replace_with, 141 string16* output) { 142 return ReplaceCharsT(input, replace_chars, replace_with, output); 143} 144 145bool ReplaceChars(const std::string& input, 146 const char replace_chars[], 147 const std::string& replace_with, 148 std::string* output) { 149 return ReplaceCharsT(input, replace_chars, replace_with, output); 150} 151 152bool RemoveChars(const string16& input, 153 const char16 remove_chars[], 154 string16* output) { 155 return ReplaceChars(input, remove_chars, string16(), output); 156} 157 158bool RemoveChars(const std::string& input, 159 const char remove_chars[], 160 std::string* output) { 161 return ReplaceChars(input, remove_chars, std::string(), output); 162} 163 164template<typename STR> 165TrimPositions TrimStringT(const STR& input, 166 const typename STR::value_type trim_chars[], 167 TrimPositions positions, 168 STR* output) { 169 // Find the edges of leading/trailing whitespace as desired. 170 const typename STR::size_type last_char = input.length() - 1; 171 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ? 172 input.find_first_not_of(trim_chars) : 0; 173 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ? 174 input.find_last_not_of(trim_chars) : last_char; 175 176 // When the string was all whitespace, report that we stripped off whitespace 177 // from whichever position the caller was interested in. For empty input, we 178 // stripped no whitespace, but we still need to clear |output|. 179 if (input.empty() || 180 (first_good_char == STR::npos) || (last_good_char == STR::npos)) { 181 bool input_was_empty = input.empty(); // in case output == &input 182 output->clear(); 183 return input_was_empty ? TRIM_NONE : positions; 184 } 185 186 // Trim the whitespace. 187 *output = 188 input.substr(first_good_char, last_good_char - first_good_char + 1); 189 190 // Return where we trimmed from. 191 return static_cast<TrimPositions>( 192 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) | 193 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING)); 194} 195 196bool TrimString(const string16& input, 197 const char16 trim_chars[], 198 string16* output) { 199 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 200} 201 202bool TrimString(const std::string& input, 203 const char trim_chars[], 204 std::string* output) { 205 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE; 206} 207 208void TruncateUTF8ToByteSize(const std::string& input, 209 const size_t byte_size, 210 std::string* output) { 211 DCHECK(output); 212 if (byte_size > input.length()) { 213 *output = input; 214 return; 215 } 216 DCHECK_LE(byte_size, static_cast<uint32>(kint32max)); 217 // Note: This cast is necessary because CBU8_NEXT uses int32s. 218 int32 truncation_length = static_cast<int32>(byte_size); 219 int32 char_index = truncation_length - 1; 220 const char* data = input.data(); 221 222 // Using CBU8, we will move backwards from the truncation point 223 // to the beginning of the string looking for a valid UTF8 224 // character. Once a full UTF8 character is found, we will 225 // truncate the string to the end of that character. 226 while (char_index >= 0) { 227 int32 prev = char_index; 228 uint32 code_point = 0; 229 CBU8_NEXT(data, char_index, truncation_length, code_point); 230 if (!base::IsValidCharacter(code_point) || 231 !base::IsValidCodepoint(code_point)) { 232 char_index = prev - 1; 233 } else { 234 break; 235 } 236 } 237 238 if (char_index >= 0 ) 239 *output = input.substr(0, char_index); 240 else 241 output->clear(); 242} 243 244TrimPositions TrimWhitespace(const string16& input, 245 TrimPositions positions, 246 string16* output) { 247 return TrimStringT(input, kWhitespaceUTF16, positions, output); 248} 249 250TrimPositions TrimWhitespaceASCII(const std::string& input, 251 TrimPositions positions, 252 std::string* output) { 253 return TrimStringT(input, kWhitespaceASCII, positions, output); 254} 255 256// This function is only for backward-compatibility. 257// To be removed when all callers are updated. 258TrimPositions TrimWhitespace(const std::string& input, 259 TrimPositions positions, 260 std::string* output) { 261 return TrimWhitespaceASCII(input, positions, output); 262} 263 264template<typename STR> 265STR CollapseWhitespaceT(const STR& text, 266 bool trim_sequences_with_line_breaks) { 267 STR result; 268 result.resize(text.size()); 269 270 // Set flags to pretend we're already in a trimmed whitespace sequence, so we 271 // will trim any leading whitespace. 272 bool in_whitespace = true; 273 bool already_trimmed = true; 274 275 int chars_written = 0; 276 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) { 277 if (IsWhitespace(*i)) { 278 if (!in_whitespace) { 279 // Reduce all whitespace sequences to a single space. 280 in_whitespace = true; 281 result[chars_written++] = L' '; 282 } 283 if (trim_sequences_with_line_breaks && !already_trimmed && 284 ((*i == '\n') || (*i == '\r'))) { 285 // Whitespace sequences containing CR or LF are eliminated entirely. 286 already_trimmed = true; 287 --chars_written; 288 } 289 } else { 290 // Non-whitespace chracters are copied straight across. 291 in_whitespace = false; 292 already_trimmed = false; 293 result[chars_written++] = *i; 294 } 295 } 296 297 if (in_whitespace && !already_trimmed) { 298 // Any trailing whitespace is eliminated. 299 --chars_written; 300 } 301 302 result.resize(chars_written); 303 return result; 304} 305 306string16 CollapseWhitespace(const string16& text, 307 bool trim_sequences_with_line_breaks) { 308 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 309} 310 311std::string CollapseWhitespaceASCII(const std::string& text, 312 bool trim_sequences_with_line_breaks) { 313 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks); 314} 315 316bool ContainsOnlyWhitespaceASCII(const std::string& str) { 317 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) { 318 if (!IsAsciiWhitespace(*i)) 319 return false; 320 } 321 return true; 322} 323 324bool ContainsOnlyWhitespace(const string16& str) { 325 return str.find_first_not_of(kWhitespaceUTF16) == string16::npos; 326} 327 328template<typename STR> 329static bool ContainsOnlyCharsT(const STR& input, const STR& characters) { 330 for (typename STR::const_iterator iter = input.begin(); 331 iter != input.end(); ++iter) { 332 if (characters.find(*iter) == STR::npos) 333 return false; 334 } 335 return true; 336} 337 338bool ContainsOnlyChars(const string16& input, const string16& characters) { 339 return ContainsOnlyCharsT(input, characters); 340} 341 342bool ContainsOnlyChars(const std::string& input, 343 const std::string& characters) { 344 return ContainsOnlyCharsT(input, characters); 345} 346 347#if !defined(WCHAR_T_IS_UTF16) 348bool IsStringASCII(const std::wstring& str); 349#endif 350 351std::string WideToASCII(const std::wstring& wide) { 352 DCHECK(IsStringASCII(wide)) << wide; 353 return std::string(wide.begin(), wide.end()); 354} 355 356std::string UTF16ToASCII(const string16& utf16) { 357 DCHECK(IsStringASCII(utf16)) << utf16; 358 return std::string(utf16.begin(), utf16.end()); 359} 360 361template<class STR> 362static bool DoIsStringASCII(const STR& str) { 363 for (size_t i = 0; i < str.length(); i++) { 364 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i]; 365 if (c > 0x7F) 366 return false; 367 } 368 return true; 369} 370 371#if !defined(WCHAR_T_IS_UTF16) 372bool IsStringASCII(const std::wstring& str) { 373 return DoIsStringASCII(str); 374} 375#endif 376 377bool IsStringASCII(const string16& str) { 378 return DoIsStringASCII(str); 379} 380 381bool IsStringASCII(const base::StringPiece& str) { 382 return DoIsStringASCII(str); 383} 384 385bool IsStringUTF8(const std::string& str) { 386 const char *src = str.data(); 387 int32 src_len = static_cast<int32>(str.length()); 388 int32 char_index = 0; 389 390 while (char_index < src_len) { 391 int32 code_point; 392 CBU8_NEXT(src, char_index, src_len, code_point); 393 if (!base::IsValidCharacter(code_point)) 394 return false; 395 } 396 return true; 397} 398 399template<typename Iter> 400static inline bool DoLowerCaseEqualsASCII(Iter a_begin, 401 Iter a_end, 402 const char* b) { 403 for (Iter it = a_begin; it != a_end; ++it, ++b) { 404 if (!*b || base::ToLowerASCII(*it) != *b) 405 return false; 406 } 407 return *b == 0; 408} 409 410// Front-ends for LowerCaseEqualsASCII. 411bool LowerCaseEqualsASCII(const std::string& a, const char* b) { 412 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 413} 414 415bool LowerCaseEqualsASCII(const string16& a, const char* b) { 416 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b); 417} 418 419bool LowerCaseEqualsASCII(std::string::const_iterator a_begin, 420 std::string::const_iterator a_end, 421 const char* b) { 422 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 423} 424 425bool LowerCaseEqualsASCII(string16::const_iterator a_begin, 426 string16::const_iterator a_end, 427 const char* b) { 428 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 429} 430 431// TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here. 432#if !defined(OS_ANDROID) 433bool LowerCaseEqualsASCII(const char* a_begin, 434 const char* a_end, 435 const char* b) { 436 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 437} 438 439bool LowerCaseEqualsASCII(const char16* a_begin, 440 const char16* a_end, 441 const char* b) { 442 return DoLowerCaseEqualsASCII(a_begin, a_end, b); 443} 444 445#endif // !defined(OS_ANDROID) 446 447bool EqualsASCII(const string16& a, const base::StringPiece& b) { 448 if (a.length() != b.length()) 449 return false; 450 return std::equal(b.begin(), b.end(), a.begin()); 451} 452 453bool StartsWithASCII(const std::string& str, 454 const std::string& search, 455 bool case_sensitive) { 456 if (case_sensitive) 457 return str.compare(0, search.length(), search) == 0; 458 else 459 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0; 460} 461 462template <typename STR> 463bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) { 464 if (case_sensitive) { 465 return str.compare(0, search.length(), search) == 0; 466 } else { 467 if (search.size() > str.size()) 468 return false; 469 return std::equal(search.begin(), search.end(), str.begin(), 470 base::CaseInsensitiveCompare<typename STR::value_type>()); 471 } 472} 473 474bool StartsWith(const string16& str, const string16& search, 475 bool case_sensitive) { 476 return StartsWithT(str, search, case_sensitive); 477} 478 479template <typename STR> 480bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) { 481 typename STR::size_type str_length = str.length(); 482 typename STR::size_type search_length = search.length(); 483 if (search_length > str_length) 484 return false; 485 if (case_sensitive) { 486 return str.compare(str_length - search_length, search_length, search) == 0; 487 } else { 488 return std::equal(search.begin(), search.end(), 489 str.begin() + (str_length - search_length), 490 base::CaseInsensitiveCompare<typename STR::value_type>()); 491 } 492} 493 494bool EndsWith(const std::string& str, const std::string& search, 495 bool case_sensitive) { 496 return EndsWithT(str, search, case_sensitive); 497} 498 499bool EndsWith(const string16& str, const string16& search, 500 bool case_sensitive) { 501 return EndsWithT(str, search, case_sensitive); 502} 503 504static const char* const kByteStringsUnlocalized[] = { 505 " B", 506 " kB", 507 " MB", 508 " GB", 509 " TB", 510 " PB" 511}; 512 513string16 FormatBytesUnlocalized(int64 bytes) { 514 double unit_amount = static_cast<double>(bytes); 515 size_t dimension = 0; 516 const int kKilo = 1024; 517 while (unit_amount >= kKilo && 518 dimension < arraysize(kByteStringsUnlocalized) - 1) { 519 unit_amount /= kKilo; 520 dimension++; 521 } 522 523 char buf[64]; 524 if (bytes != 0 && dimension > 0 && unit_amount < 100) { 525 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount, 526 kByteStringsUnlocalized[dimension]); 527 } else { 528 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount, 529 kByteStringsUnlocalized[dimension]); 530 } 531 532 return ASCIIToUTF16(buf); 533} 534 535template<class StringType> 536void DoReplaceSubstringsAfterOffset(StringType* str, 537 typename StringType::size_type start_offset, 538 const StringType& find_this, 539 const StringType& replace_with, 540 bool replace_all) { 541 if ((start_offset == StringType::npos) || (start_offset >= str->length())) 542 return; 543 544 DCHECK(!find_this.empty()); 545 for (typename StringType::size_type offs(str->find(find_this, start_offset)); 546 offs != StringType::npos; offs = str->find(find_this, offs)) { 547 str->replace(offs, find_this.length(), replace_with); 548 offs += replace_with.length(); 549 550 if (!replace_all) 551 break; 552 } 553} 554 555void ReplaceFirstSubstringAfterOffset(string16* str, 556 string16::size_type start_offset, 557 const string16& find_this, 558 const string16& replace_with) { 559 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 560 false); // replace first instance 561} 562 563void ReplaceFirstSubstringAfterOffset(std::string* str, 564 std::string::size_type start_offset, 565 const std::string& find_this, 566 const std::string& replace_with) { 567 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 568 false); // replace first instance 569} 570 571void ReplaceSubstringsAfterOffset(string16* str, 572 string16::size_type start_offset, 573 const string16& find_this, 574 const string16& replace_with) { 575 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 576 true); // replace all instances 577} 578 579void ReplaceSubstringsAfterOffset(std::string* str, 580 std::string::size_type start_offset, 581 const std::string& find_this, 582 const std::string& replace_with) { 583 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with, 584 true); // replace all instances 585} 586 587 588template<typename STR> 589static size_t TokenizeT(const STR& str, 590 const STR& delimiters, 591 std::vector<STR>* tokens) { 592 tokens->clear(); 593 594 typename STR::size_type start = str.find_first_not_of(delimiters); 595 while (start != STR::npos) { 596 typename STR::size_type end = str.find_first_of(delimiters, start + 1); 597 if (end == STR::npos) { 598 tokens->push_back(str.substr(start)); 599 break; 600 } else { 601 tokens->push_back(str.substr(start, end - start)); 602 start = str.find_first_not_of(delimiters, end + 1); 603 } 604 } 605 606 return tokens->size(); 607} 608 609size_t Tokenize(const string16& str, 610 const string16& delimiters, 611 std::vector<string16>* tokens) { 612 return TokenizeT(str, delimiters, tokens); 613} 614 615size_t Tokenize(const std::string& str, 616 const std::string& delimiters, 617 std::vector<std::string>* tokens) { 618 return TokenizeT(str, delimiters, tokens); 619} 620 621size_t Tokenize(const base::StringPiece& str, 622 const base::StringPiece& delimiters, 623 std::vector<base::StringPiece>* tokens) { 624 return TokenizeT(str, delimiters, tokens); 625} 626 627template<typename STR> 628static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) { 629 if (parts.empty()) 630 return STR(); 631 632 STR result(parts[0]); 633 typename std::vector<STR>::const_iterator iter = parts.begin(); 634 ++iter; 635 636 for (; iter != parts.end(); ++iter) { 637 result += sep; 638 result += *iter; 639 } 640 641 return result; 642} 643 644std::string JoinString(const std::vector<std::string>& parts, char sep) { 645 return JoinStringT(parts, std::string(1, sep)); 646} 647 648string16 JoinString(const std::vector<string16>& parts, char16 sep) { 649 return JoinStringT(parts, string16(1, sep)); 650} 651 652std::string JoinString(const std::vector<std::string>& parts, 653 const std::string& separator) { 654 return JoinStringT(parts, separator); 655} 656 657string16 JoinString(const std::vector<string16>& parts, 658 const string16& separator) { 659 return JoinStringT(parts, separator); 660} 661 662template<class FormatStringType, class OutStringType> 663OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string, 664 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) { 665 size_t substitutions = subst.size(); 666 667 size_t sub_length = 0; 668 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin(); 669 iter != subst.end(); ++iter) { 670 sub_length += iter->length(); 671 } 672 673 OutStringType formatted; 674 formatted.reserve(format_string.length() + sub_length); 675 676 std::vector<ReplacementOffset> r_offsets; 677 for (typename FormatStringType::const_iterator i = format_string.begin(); 678 i != format_string.end(); ++i) { 679 if ('$' == *i) { 680 if (i + 1 != format_string.end()) { 681 ++i; 682 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i; 683 if ('$' == *i) { 684 while (i != format_string.end() && '$' == *i) { 685 formatted.push_back('$'); 686 ++i; 687 } 688 --i; 689 } else { 690 uintptr_t index = 0; 691 while (i != format_string.end() && '0' <= *i && *i <= '9') { 692 index *= 10; 693 index += *i - '0'; 694 ++i; 695 } 696 --i; 697 index -= 1; 698 if (offsets) { 699 ReplacementOffset r_offset(index, 700 static_cast<int>(formatted.size())); 701 r_offsets.insert(std::lower_bound(r_offsets.begin(), 702 r_offsets.end(), 703 r_offset, 704 &CompareParameter), 705 r_offset); 706 } 707 if (index < substitutions) 708 formatted.append(subst.at(index)); 709 } 710 } 711 } else { 712 formatted.push_back(*i); 713 } 714 } 715 if (offsets) { 716 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin(); 717 i != r_offsets.end(); ++i) { 718 offsets->push_back(i->offset); 719 } 720 } 721 return formatted; 722} 723 724string16 ReplaceStringPlaceholders(const string16& format_string, 725 const std::vector<string16>& subst, 726 std::vector<size_t>* offsets) { 727 return DoReplaceStringPlaceholders(format_string, subst, offsets); 728} 729 730std::string ReplaceStringPlaceholders(const base::StringPiece& format_string, 731 const std::vector<std::string>& subst, 732 std::vector<size_t>* offsets) { 733 return DoReplaceStringPlaceholders(format_string, subst, offsets); 734} 735 736string16 ReplaceStringPlaceholders(const string16& format_string, 737 const string16& a, 738 size_t* offset) { 739 std::vector<size_t> offsets; 740 std::vector<string16> subst; 741 subst.push_back(a); 742 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets); 743 744 DCHECK_EQ(1U, offsets.size()); 745 if (offset) 746 *offset = offsets[0]; 747 return result; 748} 749 750static bool IsWildcard(base_icu::UChar32 character) { 751 return character == '*' || character == '?'; 752} 753 754// Move the strings pointers to the point where they start to differ. 755template <typename CHAR, typename NEXT> 756static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, 757 const CHAR** string, const CHAR* string_end, 758 NEXT next) { 759 const CHAR* escape = NULL; 760 while (*pattern != pattern_end && *string != string_end) { 761 if (!escape && IsWildcard(**pattern)) { 762 // We don't want to match wildcard here, except if it's escaped. 763 return; 764 } 765 766 // Check if the escapement char is found. If so, skip it and move to the 767 // next character. 768 if (!escape && **pattern == '\\') { 769 escape = *pattern; 770 next(pattern, pattern_end); 771 continue; 772 } 773 774 // Check if the chars match, if so, increment the ptrs. 775 const CHAR* pattern_next = *pattern; 776 const CHAR* string_next = *string; 777 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end); 778 if (pattern_char == next(&string_next, string_end) && 779 pattern_char != (base_icu::UChar32) CBU_SENTINEL) { 780 *pattern = pattern_next; 781 *string = string_next; 782 } else { 783 // Uh ho, it did not match, we are done. If the last char was an 784 // escapement, that means that it was an error to advance the ptr here, 785 // let's put it back where it was. This also mean that the MatchPattern 786 // function will return false because if we can't match an escape char 787 // here, then no one will. 788 if (escape) { 789 *pattern = escape; 790 } 791 return; 792 } 793 794 escape = NULL; 795 } 796} 797 798template <typename CHAR, typename NEXT> 799static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { 800 while (*pattern != end) { 801 if (!IsWildcard(**pattern)) 802 return; 803 next(pattern, end); 804 } 805} 806 807template <typename CHAR, typename NEXT> 808static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, 809 const CHAR* pattern, const CHAR* pattern_end, 810 int depth, 811 NEXT next) { 812 const int kMaxDepth = 16; 813 if (depth > kMaxDepth) 814 return false; 815 816 // Eat all the matching chars. 817 EatSameChars(&pattern, pattern_end, &eval, eval_end, next); 818 819 // If the string is empty, then the pattern must be empty too, or contains 820 // only wildcards. 821 if (eval == eval_end) { 822 EatWildcard(&pattern, pattern_end, next); 823 return pattern == pattern_end; 824 } 825 826 // Pattern is empty but not string, this is not a match. 827 if (pattern == pattern_end) 828 return false; 829 830 // If this is a question mark, then we need to compare the rest with 831 // the current string or the string with one character eaten. 832 const CHAR* next_pattern = pattern; 833 next(&next_pattern, pattern_end); 834 if (pattern[0] == '?') { 835 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 836 depth + 1, next)) 837 return true; 838 const CHAR* next_eval = eval; 839 next(&next_eval, eval_end); 840 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, 841 depth + 1, next)) 842 return true; 843 } 844 845 // This is a *, try to match all the possible substrings with the remainder 846 // of the pattern. 847 if (pattern[0] == '*') { 848 // Collapse duplicate wild cards (********** into *) so that the 849 // method does not recurse unnecessarily. http://crbug.com/52839 850 EatWildcard(&next_pattern, pattern_end, next); 851 852 while (eval != eval_end) { 853 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, 854 depth + 1, next)) 855 return true; 856 eval++; 857 } 858 859 // We reached the end of the string, let see if the pattern contains only 860 // wildcards. 861 if (eval == eval_end) { 862 EatWildcard(&pattern, pattern_end, next); 863 if (pattern != pattern_end) 864 return false; 865 return true; 866 } 867 } 868 869 return false; 870} 871 872struct NextCharUTF8 { 873 base_icu::UChar32 operator()(const char** p, const char* end) { 874 base_icu::UChar32 c; 875 int offset = 0; 876 CBU8_NEXT(*p, offset, end - *p, c); 877 *p += offset; 878 return c; 879 } 880}; 881 882struct NextCharUTF16 { 883 base_icu::UChar32 operator()(const char16** p, const char16* end) { 884 base_icu::UChar32 c; 885 int offset = 0; 886 CBU16_NEXT(*p, offset, end - *p, c); 887 *p += offset; 888 return c; 889 } 890}; 891 892bool MatchPattern(const base::StringPiece& eval, 893 const base::StringPiece& pattern) { 894 return MatchPatternT(eval.data(), eval.data() + eval.size(), 895 pattern.data(), pattern.data() + pattern.size(), 896 0, NextCharUTF8()); 897} 898 899bool MatchPattern(const string16& eval, const string16& pattern) { 900 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(), 901 pattern.c_str(), pattern.c_str() + pattern.size(), 902 0, NextCharUTF16()); 903} 904 905// The following code is compatible with the OpenBSD lcpy interface. See: 906// http://www.gratisoft.us/todd/papers/strlcpy.html 907// ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c 908 909namespace { 910 911template <typename CHAR> 912size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { 913 for (size_t i = 0; i < dst_size; ++i) { 914 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. 915 return i; 916 } 917 918 // We were left off at dst_size. We over copied 1 byte. Null terminate. 919 if (dst_size != 0) 920 dst[dst_size - 1] = 0; 921 922 // Count the rest of the |src|, and return it's length in characters. 923 while (src[dst_size]) ++dst_size; 924 return dst_size; 925} 926 927} // namespace 928 929size_t base::strlcpy(char* dst, const char* src, size_t dst_size) { 930 return lcpyT<char>(dst, src, dst_size); 931} 932size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) { 933 return lcpyT<wchar_t>(dst, src, dst_size); 934} 935