1/* 2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org) 3 * (C) 1999 Antti Koivisto (koivisto@kde.org) 4 * (C) 2001 Dirk Mueller ( mueller@kde.org ) 5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved. 6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 * 23 */ 24 25#include "config.h" 26#include "wtf/text/StringImpl.h" 27 28#include "wtf/DynamicAnnotations.h" 29#include "wtf/LeakAnnotations.h" 30#include "wtf/MainThread.h" 31#include "wtf/OwnPtr.h" 32#include "wtf/PartitionAlloc.h" 33#include "wtf/PassOwnPtr.h" 34#include "wtf/StdLibExtras.h" 35#include "wtf/WTF.h" 36#include "wtf/text/AtomicString.h" 37#include "wtf/text/StringBuffer.h" 38#include "wtf/text/StringHash.h" 39#include "wtf/unicode/CharacterNames.h" 40#include <unicode/translit.h> 41#include <unicode/unistr.h> 42 43#ifdef STRING_STATS 44#include "wtf/DataLog.h" 45#include "wtf/HashMap.h" 46#include "wtf/HashSet.h" 47#include "wtf/ProcessID.h" 48#include "wtf/RefCounted.h" 49#include "wtf/ThreadingPrimitives.h" 50#include <unistd.h> 51#endif 52 53using namespace std; 54 55namespace WTF { 56 57using namespace Unicode; 58 59COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small); 60 61#ifdef STRING_STATS 62 63static Mutex& statsMutex() 64{ 65 DEFINE_STATIC_LOCAL(Mutex, mutex, ()); 66 return mutex; 67} 68 69static HashSet<void*>& liveStrings() 70{ 71 // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings. 72 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ()); 73 return strings; 74} 75 76void addStringForStats(StringImpl* string) 77{ 78 MutexLocker locker(statsMutex()); 79 liveStrings().add(string); 80} 81 82void removeStringForStats(StringImpl* string) 83{ 84 MutexLocker locker(statsMutex()); 85 liveStrings().remove(string); 86} 87 88static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet) 89{ 90 const unsigned kMaxSnippetLength = 64; 91 snippet.clear(); 92 93 size_t expectedLength = std::min(string->length(), kMaxSnippetLength); 94 if (expectedLength == kMaxSnippetLength) 95 expectedLength += 3; // For the "...". 96 ++expectedLength; // For the terminating '\0'. 97 snippet.reserveCapacity(expectedLength); 98 99 size_t i; 100 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) { 101 UChar c = (*string)[i]; 102 if (isASCIIPrintable(c)) 103 snippet.append(c); 104 else 105 snippet.append('?'); 106 } 107 if (i < string->length()) { 108 snippet.append('.'); 109 snippet.append('.'); 110 snippet.append('.'); 111 } 112 snippet.append('\0'); 113} 114 115static bool isUnnecessarilyWide(const StringImpl* string) 116{ 117 if (string->is8Bit()) 118 return false; 119 UChar c = 0; 120 for (unsigned i = 0; i < string->length(); ++i) 121 c |= (*string)[i] >> 8; 122 return !c; 123} 124 125class PerStringStats : public RefCounted<PerStringStats> { 126public: 127 static PassRefPtr<PerStringStats> create() 128 { 129 return adoptRef(new PerStringStats); 130 } 131 132 void add(const StringImpl* string) 133 { 134 ++m_numberOfCopies; 135 if (!m_length) { 136 m_length = string->length(); 137 fillWithSnippet(string, m_snippet); 138 } 139 if (string->isAtomic()) 140 ++m_numberOfAtomicCopies; 141 if (isUnnecessarilyWide(string)) 142 m_unnecessarilyWide = true; 143 } 144 145 size_t totalCharacters() const 146 { 147 return m_numberOfCopies * m_length; 148 } 149 150 void print() 151 { 152 const char* status = "ok"; 153 if (m_unnecessarilyWide) 154 status = "16"; 155 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data()); 156 } 157 158 bool m_unnecessarilyWide; 159 unsigned m_numberOfCopies; 160 unsigned m_length; 161 unsigned m_numberOfAtomicCopies; 162 Vector<char> m_snippet; 163 164private: 165 PerStringStats() 166 : m_unnecessarilyWide(false) 167 , m_numberOfCopies(0) 168 , m_length(0) 169 , m_numberOfAtomicCopies(0) 170 { 171 } 172}; 173 174bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b) 175{ 176 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide) 177 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide; 178 if (a->totalCharacters() != b->totalCharacters()) 179 return a->totalCharacters() < b->totalCharacters(); 180 if (a->m_numberOfCopies != b->m_numberOfCopies) 181 return a->m_numberOfCopies < b->m_numberOfCopies; 182 if (a->m_length != b->m_length) 183 return a->m_length < b->m_length; 184 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies; 185} 186 187static void printLiveStringStats(void*) 188{ 189 MutexLocker locker(statsMutex()); 190 HashSet<void*>& strings = liveStrings(); 191 192 HashMap<StringImpl*, RefPtr<PerStringStats> > stats; 193 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) { 194 StringImpl* string = static_cast<StringImpl*>(*iter); 195 HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string); 196 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value; 197 value->add(string); 198 stats.set(string, value.release()); 199 } 200 201 Vector<RefPtr<PerStringStats> > all; 202 for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter) 203 all.append(iter->value); 204 205 std::sort(all.begin(), all.end()); 206 std::reverse(all.begin(), all.end()); 207 for (size_t i = 0; i < 20 && i < all.size(); ++i) 208 all[i]->print(); 209} 210 211StringStats StringImpl::m_stringStats; 212 213unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency; 214 215void StringStats::removeString(StringImpl* string) 216{ 217 unsigned length = string->length(); 218 --m_totalNumberStrings; 219 220 if (string->is8Bit()) { 221 --m_number8BitStrings; 222 m_total8BitData -= length; 223 } else { 224 --m_number16BitStrings; 225 m_total16BitData -= length; 226 } 227 228 if (!--s_stringRemovesTillPrintStats) { 229 s_stringRemovesTillPrintStats = s_printStringStatsFrequency; 230 printStats(); 231 } 232} 233 234void StringStats::printStats() 235{ 236 dataLogF("String stats for process id %d:\n", getCurrentProcessID()); 237 238 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData; 239 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0; 240 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0; 241 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength); 242 243 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0; 244 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0; 245 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength); 246 247 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0; 248 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2; 249 dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength); 250 unsigned long long totalSavedBytes = m_total8BitData; 251 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0; 252 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings); 253 254 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl); 255 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100; 256 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent); 257 258 callOnMainThread(printLiveStringStats, 0); 259} 260#endif 261 262void* StringImpl::operator new(size_t size) 263{ 264 ASSERT(size == sizeof(StringImpl)); 265 return partitionAllocGeneric(Partitions::getBufferPartition(), size); 266} 267 268void StringImpl::operator delete(void* ptr) 269{ 270 partitionFreeGeneric(Partitions::getBufferPartition(), ptr); 271} 272 273inline StringImpl::~StringImpl() 274{ 275 ASSERT(!isStatic()); 276 277 STRING_STATS_REMOVE_STRING(this); 278 279 if (isAtomic()) 280 AtomicString::remove(this); 281} 282 283void StringImpl::destroyIfNotStatic() 284{ 285 if (!isStatic()) 286 delete this; 287} 288 289PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data) 290{ 291 if (!length) { 292 data = 0; 293 return empty(); 294 } 295 296 // Allocate a single buffer large enough to contain the StringImpl 297 // struct as well as the data which it contains. This removes one 298 // heap allocation from this call. 299 StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<LChar>(length))); 300 301 data = reinterpret_cast<LChar*>(string + 1); 302 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); 303} 304 305PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) 306{ 307 if (!length) { 308 data = 0; 309 return empty(); 310 } 311 312 // Allocate a single buffer large enough to contain the StringImpl 313 // struct as well as the data which it contains. This removes one 314 // heap allocation from this call. 315 StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<UChar>(length))); 316 317 data = reinterpret_cast<UChar*>(string + 1); 318 return adoptRef(new (string) StringImpl(length)); 319} 320 321PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length) 322{ 323 ASSERT(originalString->hasOneRef()); 324 325 if (!length) 326 return empty(); 327 328 bool is8Bit = originalString->is8Bit(); 329 // Same as createUninitialized() except here we use realloc. 330 size_t size = is8Bit ? allocationSize<LChar>(length) : allocationSize<UChar>(length); 331 originalString->~StringImpl(); 332 StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size)); 333 if (is8Bit) 334 return adoptRef(new (string) StringImpl(length, Force8BitConstructor)); 335 return adoptRef(new (string) StringImpl(length)); 336} 337 338static StaticStringsTable& staticStrings() 339{ 340 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ()); 341 return staticStrings; 342} 343 344#ifndef NDEBUG 345static bool s_allowCreationOfStaticStrings = true; 346#endif 347 348const StaticStringsTable& StringImpl::allStaticStrings() 349{ 350 return staticStrings(); 351} 352 353void StringImpl::freezeStaticStrings() 354{ 355 ASSERT(isMainThread()); 356 357#ifndef NDEBUG 358 s_allowCreationOfStaticStrings = false; 359#endif 360} 361 362unsigned StringImpl::m_highestStaticStringLength = 0; 363 364StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash) 365{ 366 ASSERT(s_allowCreationOfStaticStrings); 367 ASSERT(string); 368 ASSERT(length); 369 370 StaticStringsTable::const_iterator it = staticStrings().find(hash); 371 if (it != staticStrings().end()) { 372 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar))); 373 return it->value; 374 } 375 376 // Allocate a single buffer large enough to contain the StringImpl 377 // struct as well as the data which it contains. This removes one 378 // heap allocation from this call. 379 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar))); 380 size_t size = sizeof(StringImpl) + length * sizeof(LChar); 381 382 WTF_ANNOTATE_SCOPED_MEMORY_LEAK; 383 StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size)); 384 385 LChar* data = reinterpret_cast<LChar*>(impl + 1); 386 impl = new (impl) StringImpl(length, hash, StaticString); 387 memcpy(data, string, length * sizeof(LChar)); 388#ifndef NDEBUG 389 impl->assertHashIsCorrect(); 390#endif 391 392 ASSERT(isMainThread()); 393 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length); 394 staticStrings().add(hash, impl); 395 WTF_ANNOTATE_BENIGN_RACE(impl, 396 "Benign race on the reference counter of a static string created by StringImpl::createStatic"); 397 398 return impl; 399} 400 401PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length) 402{ 403 if (!characters || !length) 404 return empty(); 405 406 UChar* data; 407 RefPtr<StringImpl> string = createUninitialized(length, data); 408 memcpy(data, characters, length * sizeof(UChar)); 409 return string.release(); 410} 411 412PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length) 413{ 414 if (!characters || !length) 415 return empty(); 416 417 LChar* data; 418 RefPtr<StringImpl> string = createUninitialized(length, data); 419 memcpy(data, characters, length * sizeof(LChar)); 420 return string.release(); 421} 422 423PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length) 424{ 425 if (!characters || !length) 426 return empty(); 427 428 LChar* data; 429 RefPtr<StringImpl> string = createUninitialized(length, data); 430 431 for (size_t i = 0; i < length; ++i) { 432 if (characters[i] & 0xff00) 433 return create(characters, length); 434 data[i] = static_cast<LChar>(characters[i]); 435 } 436 437 return string.release(); 438} 439 440PassRefPtr<StringImpl> StringImpl::create(const LChar* string) 441{ 442 if (!string) 443 return empty(); 444 size_t length = strlen(reinterpret_cast<const char*>(string)); 445 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max()); 446 return create(string, length); 447} 448 449bool StringImpl::containsOnlyWhitespace() 450{ 451 // FIXME: The definition of whitespace here includes a number of characters 452 // that are not whitespace from the point of view of RenderText; I wonder if 453 // that's a problem in practice. 454 if (is8Bit()) { 455 for (unsigned i = 0; i < m_length; ++i) { 456 UChar c = characters8()[i]; 457 if (!isASCIISpace(c)) 458 return false; 459 } 460 461 return true; 462 } 463 464 for (unsigned i = 0; i < m_length; ++i) { 465 UChar c = characters16()[i]; 466 if (!isASCIISpace(c)) 467 return false; 468 } 469 return true; 470} 471 472PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) 473{ 474 if (start >= m_length) 475 return empty(); 476 unsigned maxLength = m_length - start; 477 if (length >= maxLength) { 478 if (!start) 479 return this; 480 length = maxLength; 481 } 482 if (is8Bit()) 483 return create(characters8() + start, length); 484 485 return create(characters16() + start, length); 486} 487 488UChar32 StringImpl::characterStartingAt(unsigned i) 489{ 490 if (is8Bit()) 491 return characters8()[i]; 492 if (U16_IS_SINGLE(characters16()[i])) 493 return characters16()[i]; 494 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1])) 495 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]); 496 return 0; 497} 498 499PassRefPtr<StringImpl> StringImpl::lower() 500{ 501 // Note: This is a hot function in the Dromaeo benchmark, specifically the 502 // no-op code path up through the first 'return' statement. 503 504 // First scan the string for uppercase and non-ASCII characters: 505 bool noUpper = true; 506 UChar ored = 0; 507 if (is8Bit()) { 508 const LChar* end = characters8() + m_length; 509 for (const LChar* chp = characters8(); chp != end; ++chp) { 510 if (UNLIKELY(isASCIIUpper(*chp))) 511 noUpper = false; 512 ored |= *chp; 513 } 514 // Nothing to do if the string is all ASCII with no uppercase. 515 if (noUpper && !(ored & ~0x7F)) 516 return this; 517 518 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max())); 519 int32_t length = m_length; 520 521 LChar* data8; 522 RefPtr<StringImpl> newImpl = createUninitialized(length, data8); 523 524 if (!(ored & ~0x7F)) { 525 for (int32_t i = 0; i < length; ++i) 526 data8[i] = toASCIILower(characters8()[i]); 527 528 return newImpl.release(); 529 } 530 531 // Do a slower implementation for cases that include non-ASCII Latin-1 characters. 532 for (int32_t i = 0; i < length; ++i) 533 data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); 534 535 return newImpl.release(); 536 } 537 538 const UChar* end = characters16() + m_length; 539 for (const UChar* chp = characters16(); chp != end; ++chp) { 540 if (UNLIKELY(isASCIIUpper(*chp))) 541 noUpper = false; 542 ored |= *chp; 543 } 544 // Nothing to do if the string is all ASCII with no uppercase. 545 if (noUpper && !(ored & ~0x7F)) 546 return this; 547 548 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max())); 549 int32_t length = m_length; 550 551 if (!(ored & ~0x7F)) { 552 UChar* data16; 553 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 554 555 for (int32_t i = 0; i < length; ++i) { 556 UChar c = characters16()[i]; 557 data16[i] = toASCIILower(c); 558 } 559 return newImpl.release(); 560 } 561 562 // Do a slower implementation for cases that include non-ASCII characters. 563 UChar* data16; 564 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 565 566 bool error; 567 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error); 568 if (!error && realLength == length) 569 return newImpl.release(); 570 571 newImpl = createUninitialized(realLength, data16); 572 Unicode::toLower(data16, realLength, characters16(), m_length, &error); 573 if (error) 574 return this; 575 return newImpl.release(); 576} 577 578PassRefPtr<StringImpl> StringImpl::upper() 579{ 580 // This function could be optimized for no-op cases the way lower() is, 581 // but in empirical testing, few actual calls to upper() are no-ops, so 582 // it wouldn't be worth the extra time for pre-scanning. 583 584 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max())); 585 int32_t length = m_length; 586 587 if (is8Bit()) { 588 LChar* data8; 589 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8); 590 591 // Do a faster loop for the case where all the characters are ASCII. 592 LChar ored = 0; 593 for (int i = 0; i < length; ++i) { 594 LChar c = characters8()[i]; 595 ored |= c; 596 data8[i] = toASCIIUpper(c); 597 } 598 if (!(ored & ~0x7F)) 599 return newImpl.release(); 600 601 // Do a slower implementation for cases that include non-ASCII Latin-1 characters. 602 int numberSharpSCharacters = 0; 603 604 // There are two special cases. 605 // 1. latin-1 characters when converted to upper case are 16 bit characters. 606 // 2. Lower case sharp-S converts to "SS" (two characters) 607 for (int32_t i = 0; i < length; ++i) { 608 LChar c = characters8()[i]; 609 if (UNLIKELY(c == smallLetterSharpS)) 610 ++numberSharpSCharacters; 611 UChar upper = Unicode::toUpper(c); 612 if (UNLIKELY(upper > 0xff)) { 613 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path. 614 goto upconvert; 615 } 616 data8[i] = static_cast<LChar>(upper); 617 } 618 619 if (!numberSharpSCharacters) 620 return newImpl.release(); 621 622 // We have numberSSCharacters sharp-s characters, but none of the other special characters. 623 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8); 624 625 LChar* dest = data8; 626 627 for (int32_t i = 0; i < length; ++i) { 628 LChar c = characters8()[i]; 629 if (c == smallLetterSharpS) { 630 *dest++ = 'S'; 631 *dest++ = 'S'; 632 } else 633 *dest++ = static_cast<LChar>(Unicode::toUpper(c)); 634 } 635 636 return newImpl.release(); 637 } 638 639upconvert: 640 RefPtr<StringImpl> upconverted = upconvertedString(); 641 const UChar* source16 = upconverted->characters16(); 642 643 UChar* data16; 644 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16); 645 646 // Do a faster loop for the case where all the characters are ASCII. 647 UChar ored = 0; 648 for (int i = 0; i < length; ++i) { 649 UChar c = source16[i]; 650 ored |= c; 651 data16[i] = toASCIIUpper(c); 652 } 653 if (!(ored & ~0x7F)) 654 return newImpl.release(); 655 656 // Do a slower implementation for cases that include non-ASCII characters. 657 bool error; 658 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error); 659 if (!error && realLength == length) 660 return newImpl; 661 newImpl = createUninitialized(realLength, data16); 662 Unicode::toUpper(data16, realLength, source16, m_length, &error); 663 if (error) 664 return this; 665 return newImpl.release(); 666} 667 668static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang) 669{ 670 if (equalIgnoringCase(localeId, lang)) 671 return true; 672 static char localeIdPrefix[4]; 673 static const char delimeter[4] = "-_@"; 674 675 size_t langLength = strlen(lang); 676 RELEASE_ASSERT(langLength >= 2 && langLength <= 3); 677 strncpy(localeIdPrefix, lang, langLength); 678 for (int i = 0; i < 3; ++i) { 679 localeIdPrefix[langLength] = delimeter[i]; 680 // case-insensitive comparison 681 if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLength + 1, false)) 682 return true; 683 } 684 return false; 685} 686 687typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, const char*, UErrorCode*); 688 689static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString) 690{ 691 UChar* data16; 692 int32_t targetLength = length; 693 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16); 694 do { 695 UErrorCode status = U_ZERO_ERROR; 696 targetLength = converter(data16, targetLength, source16, length, locale, &status); 697 if (U_SUCCESS(status)) { 698 output->truncateAssumingIsolated(targetLength); 699 return output.release(); 700 } 701 if (status != U_BUFFER_OVERFLOW_ERROR) 702 return originalString; 703 // Expand the buffer. 704 output = StringImpl::createUninitialized(targetLength, data16); 705 } while (true); 706} 707 708PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier) 709{ 710 // Use the more-optimized code path most of the time. 711 // Only Turkic (tr and az) languages and Lithuanian requires 712 // locale-specific lowercasing rules. Even though CLDR has el-Lower, 713 // it's identical to the locale-agnostic lowercasing. Context-dependent 714 // handling of Greek capital sigma is built into the common lowercasing 715 // function in ICU. 716 const char* localeForConversion = 0; 717 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az")) 718 localeForConversion = "tr"; 719 else if (localeIdMatchesLang(localeIdentifier, "lt")) 720 localeForConversion = "lt"; 721 else 722 return lower(); 723 724 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) 725 CRASH(); 726 int length = m_length; 727 728 RefPtr<StringImpl> upconverted = upconvertedString(); 729 const UChar* source16 = upconverted->characters16(); 730 return caseConvert(source16, length, u_strToLower, localeForConversion, this); 731} 732 733PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier) 734{ 735 // Use the more-optimized code path most of the time. 736 // Only Turkic (tr and az) languages and Greek require locale-specific 737 // lowercasing rules. 738 icu::UnicodeString transliteratorId; 739 const char* localeForConversion = 0; 740 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az")) 741 localeForConversion = "tr"; 742 else if (localeIdMatchesLang(localeIdentifier, "el")) 743 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper"); 744 else if (localeIdMatchesLang(localeIdentifier, "lt")) 745 localeForConversion = "lt"; 746 else 747 return upper(); 748 749 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) 750 CRASH(); 751 int length = m_length; 752 753 RefPtr<StringImpl> upconverted = upconvertedString(); 754 const UChar* source16 = upconverted->characters16(); 755 756 if (localeForConversion) 757 return caseConvert(source16, length, u_strToUpper, localeForConversion, this); 758 759 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek. 760 UErrorCode status = U_ZERO_ERROR; 761 OwnPtr<icu::Transliterator> translit = 762 adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORWARD, status)); 763 if (U_FAILURE(status)) 764 return upper(); 765 766 // target will be copy-on-write. 767 icu::UnicodeString target(false, source16, length); 768 translit->transliterate(target); 769 770 return create(target.getBuffer(), target.length()); 771} 772 773PassRefPtr<StringImpl> StringImpl::fill(UChar character) 774{ 775 if (!(character & ~0x7F)) { 776 LChar* data; 777 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 778 for (unsigned i = 0; i < m_length; ++i) 779 data[i] = character; 780 return newImpl.release(); 781 } 782 UChar* data; 783 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 784 for (unsigned i = 0; i < m_length; ++i) 785 data[i] = character; 786 return newImpl.release(); 787} 788 789PassRefPtr<StringImpl> StringImpl::foldCase() 790{ 791 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max())); 792 int32_t length = m_length; 793 794 if (is8Bit()) { 795 // Do a faster loop for the case where all the characters are ASCII. 796 LChar* data; 797 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data); 798 LChar ored = 0; 799 800 for (int32_t i = 0; i < length; ++i) { 801 LChar c = characters8()[i]; 802 data[i] = toASCIILower(c); 803 ored |= c; 804 } 805 806 if (!(ored & ~0x7F)) 807 return newImpl.release(); 808 809 // Do a slower implementation for cases that include non-ASCII Latin-1 characters. 810 for (int32_t i = 0; i < length; ++i) 811 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i])); 812 813 return newImpl.release(); 814 } 815 816 // Do a faster loop for the case where all the characters are ASCII. 817 UChar* data; 818 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 819 UChar ored = 0; 820 for (int32_t i = 0; i < length; ++i) { 821 UChar c = characters16()[i]; 822 ored |= c; 823 data[i] = toASCIILower(c); 824 } 825 if (!(ored & ~0x7F)) 826 return newImpl.release(); 827 828 // Do a slower implementation for cases that include non-ASCII characters. 829 bool error; 830 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error); 831 if (!error && realLength == length) 832 return newImpl.release(); 833 newImpl = createUninitialized(realLength, data); 834 Unicode::foldCase(data, realLength, characters16(), m_length, &error); 835 if (error) 836 return this; 837 return newImpl.release(); 838} 839 840template <class UCharPredicate> 841inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate) 842{ 843 if (!m_length) 844 return empty(); 845 846 unsigned start = 0; 847 unsigned end = m_length - 1; 848 849 // skip white space from start 850 while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start])) 851 ++start; 852 853 // only white space 854 if (start > end) 855 return empty(); 856 857 // skip white space from end 858 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end])) 859 --end; 860 861 if (!start && end == m_length - 1) 862 return this; 863 if (is8Bit()) 864 return create(characters8() + start, end + 1 - start); 865 return create(characters16() + start, end + 1 - start); 866} 867 868class UCharPredicate { 869public: 870 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { } 871 872 inline bool operator()(UChar ch) const 873 { 874 return m_function(ch); 875 } 876 877private: 878 const CharacterMatchFunctionPtr m_function; 879}; 880 881class SpaceOrNewlinePredicate { 882public: 883 inline bool operator()(UChar ch) const 884 { 885 return isSpaceOrNewline(ch); 886 } 887}; 888 889PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() 890{ 891 return stripMatchedCharacters(SpaceOrNewlinePredicate()); 892} 893 894PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) 895{ 896 return stripMatchedCharacters(UCharPredicate(isWhiteSpace)); 897} 898 899template <typename CharType> 900ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch) 901{ 902 const CharType* from = characters; 903 const CharType* fromend = from + m_length; 904 905 // Assume the common case will not remove any characters 906 while (from != fromend && !findMatch(*from)) 907 ++from; 908 if (from == fromend) 909 return this; 910 911 StringBuffer<CharType> data(m_length); 912 CharType* to = data.characters(); 913 unsigned outc = from - characters; 914 915 if (outc) 916 memcpy(to, characters, outc * sizeof(CharType)); 917 918 while (true) { 919 while (from != fromend && findMatch(*from)) 920 ++from; 921 while (from != fromend && !findMatch(*from)) 922 to[outc++] = *from++; 923 if (from == fromend) 924 break; 925 } 926 927 data.shrink(outc); 928 929 return data.release(); 930} 931 932PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch) 933{ 934 if (is8Bit()) 935 return removeCharacters(characters8(), findMatch); 936 return removeCharacters(characters16(), findMatch); 937} 938 939template <typename CharType, class UCharPredicate> 940inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior) 941{ 942 StringBuffer<CharType> data(m_length); 943 944 const CharType* from = getCharacters<CharType>(); 945 const CharType* fromend = from + m_length; 946 int outc = 0; 947 bool changedToSpace = false; 948 949 CharType* to = data.characters(); 950 951 if (stripBehavior == StripExtraWhiteSpace) { 952 while (true) { 953 while (from != fromend && predicate(*from)) { 954 if (*from != ' ') 955 changedToSpace = true; 956 ++from; 957 } 958 while (from != fromend && !predicate(*from)) 959 to[outc++] = *from++; 960 if (from != fromend) 961 to[outc++] = ' '; 962 else 963 break; 964 } 965 966 if (outc > 0 && to[outc - 1] == ' ') 967 --outc; 968 } else { 969 for (; from != fromend; ++from) { 970 if (predicate(*from)) { 971 if (*from != ' ') 972 changedToSpace = true; 973 to[outc++] = ' '; 974 } else { 975 to[outc++] = *from; 976 } 977 } 978 } 979 980 if (static_cast<unsigned>(outc) == m_length && !changedToSpace) 981 return this; 982 983 data.shrink(outc); 984 985 return data.release(); 986} 987 988PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior) 989{ 990 if (is8Bit()) 991 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior); 992 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior); 993} 994 995PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior) 996{ 997 if (is8Bit()) 998 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior); 999 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior); 1000} 1001 1002int StringImpl::toIntStrict(bool* ok, int base) 1003{ 1004 if (is8Bit()) 1005 return charactersToIntStrict(characters8(), m_length, ok, base); 1006 return charactersToIntStrict(characters16(), m_length, ok, base); 1007} 1008 1009unsigned StringImpl::toUIntStrict(bool* ok, int base) 1010{ 1011 if (is8Bit()) 1012 return charactersToUIntStrict(characters8(), m_length, ok, base); 1013 return charactersToUIntStrict(characters16(), m_length, ok, base); 1014} 1015 1016int64_t StringImpl::toInt64Strict(bool* ok, int base) 1017{ 1018 if (is8Bit()) 1019 return charactersToInt64Strict(characters8(), m_length, ok, base); 1020 return charactersToInt64Strict(characters16(), m_length, ok, base); 1021} 1022 1023uint64_t StringImpl::toUInt64Strict(bool* ok, int base) 1024{ 1025 if (is8Bit()) 1026 return charactersToUInt64Strict(characters8(), m_length, ok, base); 1027 return charactersToUInt64Strict(characters16(), m_length, ok, base); 1028} 1029 1030intptr_t StringImpl::toIntPtrStrict(bool* ok, int base) 1031{ 1032 if (is8Bit()) 1033 return charactersToIntPtrStrict(characters8(), m_length, ok, base); 1034 return charactersToIntPtrStrict(characters16(), m_length, ok, base); 1035} 1036 1037int StringImpl::toInt(bool* ok) 1038{ 1039 if (is8Bit()) 1040 return charactersToInt(characters8(), m_length, ok); 1041 return charactersToInt(characters16(), m_length, ok); 1042} 1043 1044unsigned StringImpl::toUInt(bool* ok) 1045{ 1046 if (is8Bit()) 1047 return charactersToUInt(characters8(), m_length, ok); 1048 return charactersToUInt(characters16(), m_length, ok); 1049} 1050 1051int64_t StringImpl::toInt64(bool* ok) 1052{ 1053 if (is8Bit()) 1054 return charactersToInt64(characters8(), m_length, ok); 1055 return charactersToInt64(characters16(), m_length, ok); 1056} 1057 1058uint64_t StringImpl::toUInt64(bool* ok) 1059{ 1060 if (is8Bit()) 1061 return charactersToUInt64(characters8(), m_length, ok); 1062 return charactersToUInt64(characters16(), m_length, ok); 1063} 1064 1065intptr_t StringImpl::toIntPtr(bool* ok) 1066{ 1067 if (is8Bit()) 1068 return charactersToIntPtr(characters8(), m_length, ok); 1069 return charactersToIntPtr(characters16(), m_length, ok); 1070} 1071 1072double StringImpl::toDouble(bool* ok) 1073{ 1074 if (is8Bit()) 1075 return charactersToDouble(characters8(), m_length, ok); 1076 return charactersToDouble(characters16(), m_length, ok); 1077} 1078 1079float StringImpl::toFloat(bool* ok) 1080{ 1081 if (is8Bit()) 1082 return charactersToFloat(characters8(), m_length, ok); 1083 return charactersToFloat(characters16(), m_length, ok); 1084} 1085 1086bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length) 1087{ 1088 while (length--) { 1089 LChar bc = *b++; 1090 if (foldCase(*a++) != foldCase(bc)) 1091 return false; 1092 } 1093 return true; 1094} 1095 1096bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length) 1097{ 1098 while (length--) { 1099 LChar bc = *b++; 1100 if (foldCase(*a++) != foldCase(bc)) 1101 return false; 1102 } 1103 return true; 1104} 1105 1106size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start) 1107{ 1108 if (is8Bit()) 1109 return WTF::find(characters8(), m_length, matchFunction, start); 1110 return WTF::find(characters16(), m_length, matchFunction, start); 1111} 1112 1113size_t StringImpl::find(const LChar* matchString, unsigned index) 1114{ 1115 // Check for null or empty string to match against 1116 if (!matchString) 1117 return kNotFound; 1118 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)); 1119 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); 1120 unsigned matchLength = matchStringLength; 1121 if (!matchLength) 1122 return min(index, length()); 1123 1124 // Optimization 1: fast case for strings of length 1. 1125 if (matchLength == 1) 1126 return WTF::find(characters16(), length(), *matchString, index); 1127 1128 // Check index & matchLength are in range. 1129 if (index > length()) 1130 return kNotFound; 1131 unsigned searchLength = length() - index; 1132 if (matchLength > searchLength) 1133 return kNotFound; 1134 // delta is the number of additional times to test; delta == 0 means test only once. 1135 unsigned delta = searchLength - matchLength; 1136 1137 const UChar* searchCharacters = characters16() + index; 1138 1139 // Optimization 2: keep a running hash of the strings, 1140 // only call equal if the hashes match. 1141 unsigned searchHash = 0; 1142 unsigned matchHash = 0; 1143 for (unsigned i = 0; i < matchLength; ++i) { 1144 searchHash += searchCharacters[i]; 1145 matchHash += matchString[i]; 1146 } 1147 1148 unsigned i = 0; 1149 // keep looping until we match 1150 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) { 1151 if (i == delta) 1152 return kNotFound; 1153 searchHash += searchCharacters[i + matchLength]; 1154 searchHash -= searchCharacters[i]; 1155 ++i; 1156 } 1157 return index + i; 1158} 1159 1160template<typename CharType> 1161ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength) 1162{ 1163 // delta is the number of additional times to test; delta == 0 means test only once. 1164 unsigned delta = searchLength - matchLength; 1165 1166 unsigned i = 0; 1167 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) { 1168 if (i == delta) 1169 return kNotFound; 1170 ++i; 1171 } 1172 return index + i; 1173} 1174 1175size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index) 1176{ 1177 // Check for null or empty string to match against 1178 if (!matchString) 1179 return kNotFound; 1180 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString)); 1181 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max()); 1182 unsigned matchLength = matchStringLength; 1183 if (!matchLength) 1184 return min(index, length()); 1185 1186 // Check index & matchLength are in range. 1187 if (index > length()) 1188 return kNotFound; 1189 unsigned searchLength = length() - index; 1190 if (matchLength > searchLength) 1191 return kNotFound; 1192 1193 if (is8Bit()) 1194 return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength); 1195 return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength); 1196} 1197 1198template <typename SearchCharacterType, typename MatchCharacterType> 1199ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength) 1200{ 1201 // Optimization: keep a running hash of the strings, 1202 // only call equal() if the hashes match. 1203 1204 // delta is the number of additional times to test; delta == 0 means test only once. 1205 unsigned delta = searchLength - matchLength; 1206 1207 unsigned searchHash = 0; 1208 unsigned matchHash = 0; 1209 1210 for (unsigned i = 0; i < matchLength; ++i) { 1211 searchHash += searchCharacters[i]; 1212 matchHash += matchCharacters[i]; 1213 } 1214 1215 unsigned i = 0; 1216 // keep looping until we match 1217 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) { 1218 if (i == delta) 1219 return kNotFound; 1220 searchHash += searchCharacters[i + matchLength]; 1221 searchHash -= searchCharacters[i]; 1222 ++i; 1223 } 1224 return index + i; 1225} 1226 1227size_t StringImpl::find(StringImpl* matchString) 1228{ 1229 // Check for null string to match against 1230 if (UNLIKELY(!matchString)) 1231 return kNotFound; 1232 unsigned matchLength = matchString->length(); 1233 1234 // Optimization 1: fast case for strings of length 1. 1235 if (matchLength == 1) { 1236 if (is8Bit()) { 1237 if (matchString->is8Bit()) 1238 return WTF::find(characters8(), length(), matchString->characters8()[0]); 1239 return WTF::find(characters8(), length(), matchString->characters16()[0]); 1240 } 1241 if (matchString->is8Bit()) 1242 return WTF::find(characters16(), length(), matchString->characters8()[0]); 1243 return WTF::find(characters16(), length(), matchString->characters16()[0]); 1244 } 1245 1246 // Check matchLength is in range. 1247 if (matchLength > length()) 1248 return kNotFound; 1249 1250 // Check for empty string to match against 1251 if (UNLIKELY(!matchLength)) 1252 return 0; 1253 1254 if (is8Bit()) { 1255 if (matchString->is8Bit()) 1256 return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength); 1257 return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength); 1258 } 1259 1260 if (matchString->is8Bit()) 1261 return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength); 1262 1263 return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength); 1264} 1265 1266size_t StringImpl::find(StringImpl* matchString, unsigned index) 1267{ 1268 // Check for null or empty string to match against 1269 if (UNLIKELY(!matchString)) 1270 return kNotFound; 1271 1272 unsigned matchLength = matchString->length(); 1273 1274 // Optimization 1: fast case for strings of length 1. 1275 if (matchLength == 1) { 1276 if (is8Bit()) 1277 return WTF::find(characters8(), length(), (*matchString)[0], index); 1278 return WTF::find(characters16(), length(), (*matchString)[0], index); 1279 } 1280 1281 if (UNLIKELY(!matchLength)) 1282 return min(index, length()); 1283 1284 // Check index & matchLength are in range. 1285 if (index > length()) 1286 return kNotFound; 1287 unsigned searchLength = length() - index; 1288 if (matchLength > searchLength) 1289 return kNotFound; 1290 1291 if (is8Bit()) { 1292 if (matchString->is8Bit()) 1293 return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength); 1294 return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength); 1295 } 1296 1297 if (matchString->is8Bit()) 1298 return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength); 1299 1300 return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength); 1301} 1302 1303template <typename SearchCharacterType, typename MatchCharacterType> 1304ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength) 1305{ 1306 // delta is the number of additional times to test; delta == 0 means test only once. 1307 unsigned delta = searchLength - matchLength; 1308 1309 unsigned i = 0; 1310 // keep looping until we match 1311 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) { 1312 if (i == delta) 1313 return kNotFound; 1314 ++i; 1315 } 1316 return index + i; 1317} 1318 1319size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index) 1320{ 1321 // Check for null or empty string to match against 1322 if (!matchString) 1323 return kNotFound; 1324 unsigned matchLength = matchString->length(); 1325 if (!matchLength) 1326 return min(index, length()); 1327 1328 // Check index & matchLength are in range. 1329 if (index > length()) 1330 return kNotFound; 1331 unsigned searchLength = length() - index; 1332 if (matchLength > searchLength) 1333 return kNotFound; 1334 1335 if (is8Bit()) { 1336 if (matchString->is8Bit()) 1337 return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength); 1338 return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength); 1339 } 1340 1341 if (matchString->is8Bit()) 1342 return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength); 1343 1344 return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength); 1345} 1346 1347size_t StringImpl::findNextLineStart(unsigned index) 1348{ 1349 if (is8Bit()) 1350 return WTF::findNextLineStart(characters8(), m_length, index); 1351 return WTF::findNextLineStart(characters16(), m_length, index); 1352} 1353 1354size_t StringImpl::count(LChar c) const 1355{ 1356 int count = 0; 1357 if (is8Bit()) { 1358 for (size_t i = 0; i < m_length; ++i) 1359 count += characters8()[i] == c; 1360 } else { 1361 for (size_t i = 0; i < m_length; ++i) 1362 count += characters16()[i] == c; 1363 } 1364 return count; 1365} 1366 1367size_t StringImpl::reverseFind(UChar c, unsigned index) 1368{ 1369 if (is8Bit()) 1370 return WTF::reverseFind(characters8(), m_length, c, index); 1371 return WTF::reverseFind(characters16(), m_length, c, index); 1372} 1373 1374template <typename SearchCharacterType, typename MatchCharacterType> 1375ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength) 1376{ 1377 // Optimization: keep a running hash of the strings, 1378 // only call equal if the hashes match. 1379 1380 // delta is the number of additional times to test; delta == 0 means test only once. 1381 unsigned delta = min(index, length - matchLength); 1382 1383 unsigned searchHash = 0; 1384 unsigned matchHash = 0; 1385 for (unsigned i = 0; i < matchLength; ++i) { 1386 searchHash += searchCharacters[delta + i]; 1387 matchHash += matchCharacters[i]; 1388 } 1389 1390 // keep looping until we match 1391 while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) { 1392 if (!delta) 1393 return kNotFound; 1394 --delta; 1395 searchHash -= searchCharacters[delta + matchLength]; 1396 searchHash += searchCharacters[delta]; 1397 } 1398 return delta; 1399} 1400 1401size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index) 1402{ 1403 // Check for null or empty string to match against 1404 if (!matchString) 1405 return kNotFound; 1406 unsigned matchLength = matchString->length(); 1407 unsigned ourLength = length(); 1408 if (!matchLength) 1409 return min(index, ourLength); 1410 1411 // Optimization 1: fast case for strings of length 1. 1412 if (matchLength == 1) { 1413 if (is8Bit()) 1414 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index); 1415 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index); 1416 } 1417 1418 // Check index & matchLength are in range. 1419 if (matchLength > ourLength) 1420 return kNotFound; 1421 1422 if (is8Bit()) { 1423 if (matchString->is8Bit()) 1424 return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength); 1425 return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength); 1426 } 1427 1428 if (matchString->is8Bit()) 1429 return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength); 1430 1431 return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength); 1432} 1433 1434template <typename SearchCharacterType, typename MatchCharacterType> 1435ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength) 1436{ 1437 // delta is the number of additional times to test; delta == 0 means test only once. 1438 unsigned delta = min(index, length - matchLength); 1439 1440 // keep looping until we match 1441 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) { 1442 if (!delta) 1443 return kNotFound; 1444 --delta; 1445 } 1446 return delta; 1447} 1448 1449size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index) 1450{ 1451 // Check for null or empty string to match against 1452 if (!matchString) 1453 return kNotFound; 1454 unsigned matchLength = matchString->length(); 1455 unsigned ourLength = length(); 1456 if (!matchLength) 1457 return min(index, ourLength); 1458 1459 // Check index & matchLength are in range. 1460 if (matchLength > ourLength) 1461 return kNotFound; 1462 1463 if (is8Bit()) { 1464 if (matchString->is8Bit()) 1465 return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength); 1466 return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength); 1467 } 1468 1469 if (matchString->is8Bit()) 1470 return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength); 1471 1472 return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength); 1473} 1474 1475ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive) 1476{ 1477 ASSERT(stringImpl); 1478 ASSERT(matchLength <= stringImpl->length()); 1479 ASSERT(startOffset + matchLength <= stringImpl->length()); 1480 1481 if (caseSensitive) { 1482 if (stringImpl->is8Bit()) 1483 return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); 1484 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); 1485 } 1486 if (stringImpl->is8Bit()) 1487 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); 1488 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength); 1489} 1490 1491bool StringImpl::startsWith(UChar character) const 1492{ 1493 return m_length && (*this)[0] == character; 1494} 1495 1496bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const 1497{ 1498 ASSERT(matchLength); 1499 if (matchLength > length()) 1500 return false; 1501 return equalInner(this, 0, matchString, matchLength, caseSensitive); 1502} 1503 1504bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive) 1505{ 1506 ASSERT(matchString); 1507 if (m_length >= matchString->m_length) { 1508 unsigned start = m_length - matchString->m_length; 1509 return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start; 1510 } 1511 return false; 1512} 1513 1514bool StringImpl::endsWith(UChar character) const 1515{ 1516 return m_length && (*this)[m_length - 1] == character; 1517} 1518 1519bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const 1520{ 1521 ASSERT(matchLength); 1522 if (matchLength > length()) 1523 return false; 1524 unsigned startOffset = length() - matchLength; 1525 return equalInner(this, startOffset, matchString, matchLength, caseSensitive); 1526} 1527 1528PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) 1529{ 1530 if (oldC == newC) 1531 return this; 1532 1533 if (find(oldC) == kNotFound) 1534 return this; 1535 1536 unsigned i; 1537 if (is8Bit()) { 1538 if (newC <= 0xff) { 1539 LChar* data; 1540 LChar oldChar = static_cast<LChar>(oldC); 1541 LChar newChar = static_cast<LChar>(newC); 1542 1543 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1544 1545 for (i = 0; i != m_length; ++i) { 1546 LChar ch = characters8()[i]; 1547 if (ch == oldChar) 1548 ch = newChar; 1549 data[i] = ch; 1550 } 1551 return newImpl.release(); 1552 } 1553 1554 // There is the possibility we need to up convert from 8 to 16 bit, 1555 // create a 16 bit string for the result. 1556 UChar* data; 1557 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1558 1559 for (i = 0; i != m_length; ++i) { 1560 UChar ch = characters8()[i]; 1561 if (ch == oldC) 1562 ch = newC; 1563 data[i] = ch; 1564 } 1565 1566 return newImpl.release(); 1567 } 1568 1569 UChar* data; 1570 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); 1571 1572 for (i = 0; i != m_length; ++i) { 1573 UChar ch = characters16()[i]; 1574 if (ch == oldC) 1575 ch = newC; 1576 data[i] = ch; 1577 } 1578 return newImpl.release(); 1579} 1580 1581PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) 1582{ 1583 position = min(position, length()); 1584 lengthToReplace = min(lengthToReplace, length() - position); 1585 unsigned lengthToInsert = str ? str->length() : 0; 1586 if (!lengthToReplace && !lengthToInsert) 1587 return this; 1588 1589 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert)); 1590 1591 if (is8Bit() && (!str || str->is8Bit())) { 1592 LChar* data; 1593 RefPtr<StringImpl> newImpl = 1594 createUninitialized(length() - lengthToReplace + lengthToInsert, data); 1595 memcpy(data, characters8(), position * sizeof(LChar)); 1596 if (str) 1597 memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar)); 1598 memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace, 1599 (length() - position - lengthToReplace) * sizeof(LChar)); 1600 return newImpl.release(); 1601 } 1602 UChar* data; 1603 RefPtr<StringImpl> newImpl = 1604 createUninitialized(length() - lengthToReplace + lengthToInsert, data); 1605 if (is8Bit()) 1606 for (unsigned i = 0; i < position; ++i) 1607 data[i] = characters8()[i]; 1608 else 1609 memcpy(data, characters16(), position * sizeof(UChar)); 1610 if (str) { 1611 if (str->is8Bit()) 1612 for (unsigned i = 0; i < lengthToInsert; ++i) 1613 data[i + position] = str->characters8()[i]; 1614 else 1615 memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar)); 1616 } 1617 if (is8Bit()) { 1618 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i) 1619 data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace]; 1620 } else { 1621 memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace, 1622 (length() - position - lengthToReplace) * sizeof(UChar)); 1623 } 1624 return newImpl.release(); 1625} 1626 1627PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement) 1628{ 1629 if (!replacement) 1630 return this; 1631 1632 if (replacement->is8Bit()) 1633 return replace(pattern, replacement->characters8(), replacement->length()); 1634 1635 return replace(pattern, replacement->characters16(), replacement->length()); 1636} 1637 1638PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength) 1639{ 1640 ASSERT(replacement); 1641 1642 size_t srcSegmentStart = 0; 1643 unsigned matchCount = 0; 1644 1645 // Count the matches. 1646 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { 1647 ++matchCount; 1648 ++srcSegmentStart; 1649 } 1650 1651 // If we have 0 matches then we don't have to do any more work. 1652 if (!matchCount) 1653 return this; 1654 1655 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength); 1656 1657 unsigned replaceSize = matchCount * repStrLength; 1658 unsigned newSize = m_length - matchCount; 1659 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); 1660 1661 newSize += replaceSize; 1662 1663 // Construct the new data. 1664 size_t srcSegmentEnd; 1665 unsigned srcSegmentLength; 1666 srcSegmentStart = 0; 1667 unsigned dstOffset = 0; 1668 1669 if (is8Bit()) { 1670 LChar* data; 1671 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1672 1673 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1674 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1675 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar)); 1676 dstOffset += srcSegmentLength; 1677 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar)); 1678 dstOffset += repStrLength; 1679 srcSegmentStart = srcSegmentEnd + 1; 1680 } 1681 1682 srcSegmentLength = m_length - srcSegmentStart; 1683 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar)); 1684 1685 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1686 1687 return newImpl.release(); 1688 } 1689 1690 UChar* data; 1691 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1692 1693 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1694 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1695 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1696 1697 dstOffset += srcSegmentLength; 1698 for (unsigned i = 0; i < repStrLength; ++i) 1699 data[i + dstOffset] = replacement[i]; 1700 1701 dstOffset += repStrLength; 1702 srcSegmentStart = srcSegmentEnd + 1; 1703 } 1704 1705 srcSegmentLength = m_length - srcSegmentStart; 1706 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1707 1708 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1709 1710 return newImpl.release(); 1711} 1712 1713PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength) 1714{ 1715 ASSERT(replacement); 1716 1717 size_t srcSegmentStart = 0; 1718 unsigned matchCount = 0; 1719 1720 // Count the matches. 1721 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { 1722 ++matchCount; 1723 ++srcSegmentStart; 1724 } 1725 1726 // If we have 0 matches then we don't have to do any more work. 1727 if (!matchCount) 1728 return this; 1729 1730 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength); 1731 1732 unsigned replaceSize = matchCount * repStrLength; 1733 unsigned newSize = m_length - matchCount; 1734 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize)); 1735 1736 newSize += replaceSize; 1737 1738 // Construct the new data. 1739 size_t srcSegmentEnd; 1740 unsigned srcSegmentLength; 1741 srcSegmentStart = 0; 1742 unsigned dstOffset = 0; 1743 1744 if (is8Bit()) { 1745 UChar* data; 1746 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1747 1748 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1749 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1750 for (unsigned i = 0; i < srcSegmentLength; ++i) 1751 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1752 1753 dstOffset += srcSegmentLength; 1754 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); 1755 1756 dstOffset += repStrLength; 1757 srcSegmentStart = srcSegmentEnd + 1; 1758 } 1759 1760 srcSegmentLength = m_length - srcSegmentStart; 1761 for (unsigned i = 0; i < srcSegmentLength; ++i) 1762 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1763 1764 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1765 1766 return newImpl.release(); 1767 } 1768 1769 UChar* data; 1770 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1771 1772 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1773 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1774 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1775 1776 dstOffset += srcSegmentLength; 1777 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar)); 1778 1779 dstOffset += repStrLength; 1780 srcSegmentStart = srcSegmentEnd + 1; 1781 } 1782 1783 srcSegmentLength = m_length - srcSegmentStart; 1784 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1785 1786 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1787 1788 return newImpl.release(); 1789} 1790 1791PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement) 1792{ 1793 if (!pattern || !replacement) 1794 return this; 1795 1796 unsigned patternLength = pattern->length(); 1797 if (!patternLength) 1798 return this; 1799 1800 unsigned repStrLength = replacement->length(); 1801 size_t srcSegmentStart = 0; 1802 unsigned matchCount = 0; 1803 1804 // Count the matches. 1805 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) { 1806 ++matchCount; 1807 srcSegmentStart += patternLength; 1808 } 1809 1810 // If we have 0 matches, we don't have to do any more work 1811 if (!matchCount) 1812 return this; 1813 1814 unsigned newSize = m_length - matchCount * patternLength; 1815 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength); 1816 1817 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength)); 1818 1819 newSize += matchCount * repStrLength; 1820 1821 1822 // Construct the new data 1823 size_t srcSegmentEnd; 1824 unsigned srcSegmentLength; 1825 srcSegmentStart = 0; 1826 unsigned dstOffset = 0; 1827 bool srcIs8Bit = is8Bit(); 1828 bool replacementIs8Bit = replacement->is8Bit(); 1829 1830 // There are 4 cases: 1831 // 1. This and replacement are both 8 bit. 1832 // 2. This and replacement are both 16 bit. 1833 // 3. This is 8 bit and replacement is 16 bit. 1834 // 4. This is 16 bit and replacement is 8 bit. 1835 if (srcIs8Bit && replacementIs8Bit) { 1836 // Case 1 1837 LChar* data; 1838 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1839 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1840 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1841 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar)); 1842 dstOffset += srcSegmentLength; 1843 memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar)); 1844 dstOffset += repStrLength; 1845 srcSegmentStart = srcSegmentEnd + patternLength; 1846 } 1847 1848 srcSegmentLength = m_length - srcSegmentStart; 1849 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar)); 1850 1851 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1852 1853 return newImpl.release(); 1854 } 1855 1856 UChar* data; 1857 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); 1858 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) { 1859 srcSegmentLength = srcSegmentEnd - srcSegmentStart; 1860 if (srcIs8Bit) { 1861 // Case 3. 1862 for (unsigned i = 0; i < srcSegmentLength; ++i) 1863 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1864 } else { 1865 // Case 2 & 4. 1866 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1867 } 1868 dstOffset += srcSegmentLength; 1869 if (replacementIs8Bit) { 1870 // Cases 2 & 3. 1871 for (unsigned i = 0; i < repStrLength; ++i) 1872 data[i + dstOffset] = replacement->characters8()[i]; 1873 } else { 1874 // Case 4 1875 memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar)); 1876 } 1877 dstOffset += repStrLength; 1878 srcSegmentStart = srcSegmentEnd + patternLength; 1879 } 1880 1881 srcSegmentLength = m_length - srcSegmentStart; 1882 if (srcIs8Bit) { 1883 // Case 3. 1884 for (unsigned i = 0; i < srcSegmentLength; ++i) 1885 data[i + dstOffset] = characters8()[i + srcSegmentStart]; 1886 } else { 1887 // Cases 2 & 4. 1888 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar)); 1889 } 1890 1891 ASSERT(dstOffset + srcSegmentLength == newImpl->length()); 1892 1893 return newImpl.release(); 1894} 1895 1896PassRefPtr<StringImpl> StringImpl::upconvertedString() 1897{ 1898 if (is8Bit()) 1899 return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl(); 1900 return this; 1901} 1902 1903static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b) 1904{ 1905 unsigned aLength = a->length(); 1906 unsigned bLength = b->length(); 1907 if (aLength != bLength) 1908 return false; 1909 1910 if (a->is8Bit()) { 1911 if (b->is8Bit()) 1912 return equal(a->characters8(), b->characters8(), aLength); 1913 1914 return equal(a->characters8(), b->characters16(), aLength); 1915 } 1916 1917 if (b->is8Bit()) 1918 return equal(a->characters16(), b->characters8(), aLength); 1919 1920 return equal(a->characters16(), b->characters16(), aLength); 1921} 1922 1923bool equal(const StringImpl* a, const StringImpl* b) 1924{ 1925 if (a == b) 1926 return true; 1927 if (!a || !b) 1928 return false; 1929 if (a->isAtomic() && b->isAtomic()) 1930 return false; 1931 1932 return stringImplContentEqual(a, b); 1933} 1934 1935template <typename CharType> 1936inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length) 1937{ 1938 if (!a) 1939 return !b; 1940 if (!b) 1941 return false; 1942 1943 if (a->length() != length) 1944 return false; 1945 if (a->is8Bit()) 1946 return equal(a->characters8(), b, length); 1947 return equal(a->characters16(), b, length); 1948} 1949 1950bool equal(const StringImpl* a, const LChar* b, unsigned length) 1951{ 1952 return equalInternal(a, b, length); 1953} 1954 1955bool equal(const StringImpl* a, const UChar* b, unsigned length) 1956{ 1957 return equalInternal(a, b, length); 1958} 1959 1960bool equal(const StringImpl* a, const LChar* b) 1961{ 1962 if (!a) 1963 return !b; 1964 if (!b) 1965 return !a; 1966 1967 unsigned length = a->length(); 1968 1969 if (a->is8Bit()) { 1970 const LChar* aPtr = a->characters8(); 1971 for (unsigned i = 0; i != length; ++i) { 1972 LChar bc = b[i]; 1973 LChar ac = aPtr[i]; 1974 if (!bc) 1975 return false; 1976 if (ac != bc) 1977 return false; 1978 } 1979 1980 return !b[length]; 1981 } 1982 1983 const UChar* aPtr = a->characters16(); 1984 for (unsigned i = 0; i != length; ++i) { 1985 LChar bc = b[i]; 1986 if (!bc) 1987 return false; 1988 if (aPtr[i] != bc) 1989 return false; 1990 } 1991 1992 return !b[length]; 1993} 1994 1995bool equalNonNull(const StringImpl* a, const StringImpl* b) 1996{ 1997 ASSERT(a && b); 1998 if (a == b) 1999 return true; 2000 2001 return stringImplContentEqual(a, b); 2002} 2003 2004bool equalIgnoringCase(const StringImpl* a, const StringImpl* b) 2005{ 2006 if (a == b) 2007 return true; 2008 if (!a || !b) 2009 return false; 2010 2011 return CaseFoldingHash::equal(a, b); 2012} 2013 2014bool equalIgnoringCase(const StringImpl* a, const LChar* b) 2015{ 2016 if (!a) 2017 return !b; 2018 if (!b) 2019 return !a; 2020 2021 unsigned length = a->length(); 2022 2023 // Do a faster loop for the case where all the characters are ASCII. 2024 UChar ored = 0; 2025 bool equal = true; 2026 if (a->is8Bit()) { 2027 const LChar* as = a->characters8(); 2028 for (unsigned i = 0; i != length; ++i) { 2029 LChar bc = b[i]; 2030 if (!bc) 2031 return false; 2032 UChar ac = as[i]; 2033 ored |= ac; 2034 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); 2035 } 2036 2037 // Do a slower implementation for cases that include non-ASCII characters. 2038 if (ored & ~0x7F) { 2039 equal = true; 2040 for (unsigned i = 0; i != length; ++i) 2041 equal = equal && (foldCase(as[i]) == foldCase(b[i])); 2042 } 2043 2044 return equal && !b[length]; 2045 } 2046 2047 const UChar* as = a->characters16(); 2048 for (unsigned i = 0; i != length; ++i) { 2049 LChar bc = b[i]; 2050 if (!bc) 2051 return false; 2052 UChar ac = as[i]; 2053 ored |= ac; 2054 equal = equal && (toASCIILower(ac) == toASCIILower(bc)); 2055 } 2056 2057 // Do a slower implementation for cases that include non-ASCII characters. 2058 if (ored & ~0x7F) { 2059 equal = true; 2060 for (unsigned i = 0; i != length; ++i) { 2061 equal = equal && (foldCase(as[i]) == foldCase(b[i])); 2062 } 2063 } 2064 2065 return equal && !b[length]; 2066} 2067 2068bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b) 2069{ 2070 ASSERT(a && b); 2071 if (a == b) 2072 return true; 2073 2074 unsigned length = a->length(); 2075 if (length != b->length()) 2076 return false; 2077 2078 if (a->is8Bit()) { 2079 if (b->is8Bit()) 2080 return equalIgnoringCase(a->characters8(), b->characters8(), length); 2081 2082 return equalIgnoringCase(b->characters16(), a->characters8(), length); 2083 } 2084 2085 if (b->is8Bit()) 2086 return equalIgnoringCase(a->characters16(), b->characters8(), length); 2087 2088 return equalIgnoringCase(a->characters16(), b->characters16(), length); 2089} 2090 2091bool equalIgnoringNullity(StringImpl* a, StringImpl* b) 2092{ 2093 if (!a && b && !b->length()) 2094 return true; 2095 if (!b && a && !a->length()) 2096 return true; 2097 return equal(a, b); 2098} 2099 2100size_t StringImpl::sizeInBytes() const 2101{ 2102 size_t size = length(); 2103 if (!is8Bit()) 2104 size *= 2; 2105 return size + sizeof(*this); 2106} 2107 2108} // namespace WTF 2109