1/*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 *           (C) 1999 Antti Koivisto (koivisto@kde.org)
4 *           (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB.  If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25#include "config.h"
26#include "wtf/text/StringImpl.h"
27
28#include "wtf/DynamicAnnotations.h"
29#include "wtf/LeakAnnotations.h"
30#include "wtf/MainThread.h"
31#include "wtf/OwnPtr.h"
32#include "wtf/PartitionAlloc.h"
33#include "wtf/PassOwnPtr.h"
34#include "wtf/StdLibExtras.h"
35#include "wtf/WTF.h"
36#include "wtf/text/AtomicString.h"
37#include "wtf/text/StringBuffer.h"
38#include "wtf/text/StringHash.h"
39#include "wtf/unicode/CharacterNames.h"
40#include <unicode/translit.h>
41#include <unicode/unistr.h>
42
43#ifdef STRING_STATS
44#include "wtf/DataLog.h"
45#include "wtf/HashMap.h"
46#include "wtf/HashSet.h"
47#include "wtf/ProcessID.h"
48#include "wtf/RefCounted.h"
49#include "wtf/ThreadingPrimitives.h"
50#include <unistd.h>
51#endif
52
53using namespace std;
54
55namespace WTF {
56
57using namespace Unicode;
58
59COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small);
60
61#ifdef STRING_STATS
62
63static Mutex& statsMutex()
64{
65    DEFINE_STATIC_LOCAL(Mutex, mutex, ());
66    return mutex;
67}
68
69static HashSet<void*>& liveStrings()
70{
71    // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
72    DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
73    return strings;
74}
75
76void addStringForStats(StringImpl* string)
77{
78    MutexLocker locker(statsMutex());
79    liveStrings().add(string);
80}
81
82void removeStringForStats(StringImpl* string)
83{
84    MutexLocker locker(statsMutex());
85    liveStrings().remove(string);
86}
87
88static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet)
89{
90    const unsigned kMaxSnippetLength = 64;
91    snippet.clear();
92
93    size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
94    if (expectedLength == kMaxSnippetLength)
95        expectedLength += 3; // For the "...".
96    ++expectedLength; // For the terminating '\0'.
97    snippet.reserveCapacity(expectedLength);
98
99    size_t i;
100    for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
101        UChar c = (*string)[i];
102        if (isASCIIPrintable(c))
103            snippet.append(c);
104        else
105            snippet.append('?');
106    }
107    if (i < string->length()) {
108        snippet.append('.');
109        snippet.append('.');
110        snippet.append('.');
111    }
112    snippet.append('\0');
113}
114
115static bool isUnnecessarilyWide(const StringImpl* string)
116{
117    if (string->is8Bit())
118        return false;
119    UChar c = 0;
120    for (unsigned i = 0; i < string->length(); ++i)
121        c |= (*string)[i] >> 8;
122    return !c;
123}
124
125class PerStringStats : public RefCounted<PerStringStats> {
126public:
127    static PassRefPtr<PerStringStats> create()
128    {
129        return adoptRef(new PerStringStats);
130    }
131
132    void add(const StringImpl* string)
133    {
134        ++m_numberOfCopies;
135        if (!m_length) {
136            m_length = string->length();
137            fillWithSnippet(string, m_snippet);
138        }
139        if (string->isAtomic())
140            ++m_numberOfAtomicCopies;
141        if (isUnnecessarilyWide(string))
142            m_unnecessarilyWide = true;
143    }
144
145    size_t totalCharacters() const
146    {
147        return m_numberOfCopies * m_length;
148    }
149
150    void print()
151    {
152        const char* status = "ok";
153        if (m_unnecessarilyWide)
154            status = "16";
155        dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data());
156    }
157
158    bool m_unnecessarilyWide;
159    unsigned m_numberOfCopies;
160    unsigned m_length;
161    unsigned m_numberOfAtomicCopies;
162    Vector<char> m_snippet;
163
164private:
165    PerStringStats()
166        : m_unnecessarilyWide(false)
167        , m_numberOfCopies(0)
168        , m_length(0)
169        , m_numberOfAtomicCopies(0)
170    {
171    }
172};
173
174bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
175{
176    if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
177        return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
178    if (a->totalCharacters() != b->totalCharacters())
179        return a->totalCharacters() < b->totalCharacters();
180    if (a->m_numberOfCopies != b->m_numberOfCopies)
181        return a->m_numberOfCopies < b->m_numberOfCopies;
182    if (a->m_length != b->m_length)
183        return a->m_length < b->m_length;
184    return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
185}
186
187static void printLiveStringStats(void*)
188{
189    MutexLocker locker(statsMutex());
190    HashSet<void*>& strings = liveStrings();
191
192    HashMap<StringImpl*, RefPtr<PerStringStats> > stats;
193    for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) {
194        StringImpl* string = static_cast<StringImpl*>(*iter);
195        HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string);
196        RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value;
197        value->add(string);
198        stats.set(string, value.release());
199    }
200
201    Vector<RefPtr<PerStringStats> > all;
202    for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter)
203        all.append(iter->value);
204
205    std::sort(all.begin(), all.end());
206    std::reverse(all.begin(), all.end());
207    for (size_t i = 0; i < 20 && i < all.size(); ++i)
208        all[i]->print();
209}
210
211StringStats StringImpl::m_stringStats;
212
213unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
214
215void StringStats::removeString(StringImpl* string)
216{
217    unsigned length = string->length();
218    --m_totalNumberStrings;
219
220    if (string->is8Bit()) {
221        --m_number8BitStrings;
222        m_total8BitData -= length;
223    } else {
224        --m_number16BitStrings;
225        m_total16BitData -= length;
226    }
227
228    if (!--s_stringRemovesTillPrintStats) {
229        s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
230        printStats();
231    }
232}
233
234void StringStats::printStats()
235{
236    dataLogF("String stats for process id %d:\n", getCurrentProcessID());
237
238    unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
239    double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
240    double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
241    dataLogF("%8u (%5.2f%%) 8 bit        %12llu chars  %12llu bytes  avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
242
243    double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
244    double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
245    dataLogF("%8u (%5.2f%%) 16 bit       %12llu chars  %12llu bytes  avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
246
247    double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
248    unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
249    dataLogF("%8u Total                 %12llu chars  %12llu bytes  avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
250    unsigned long long totalSavedBytes = m_total8BitData;
251    double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
252    dataLogF("         Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
253
254    unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
255    double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
256    dataLogF("         StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent);
257
258    callOnMainThread(printLiveStringStats, 0);
259}
260#endif
261
262void* StringImpl::operator new(size_t size)
263{
264    ASSERT(size == sizeof(StringImpl));
265    return partitionAllocGeneric(Partitions::getBufferPartition(), size);
266}
267
268void StringImpl::operator delete(void* ptr)
269{
270    partitionFreeGeneric(Partitions::getBufferPartition(), ptr);
271}
272
273inline StringImpl::~StringImpl()
274{
275    ASSERT(!isStatic());
276
277    STRING_STATS_REMOVE_STRING(this);
278
279    if (isAtomic())
280        AtomicString::remove(this);
281}
282
283void StringImpl::destroyIfNotStatic()
284{
285    if (!isStatic())
286        delete this;
287}
288
289PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
290{
291    if (!length) {
292        data = 0;
293        return empty();
294    }
295
296    // Allocate a single buffer large enough to contain the StringImpl
297    // struct as well as the data which it contains. This removes one
298    // heap allocation from this call.
299    StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<LChar>(length)));
300
301    data = reinterpret_cast<LChar*>(string + 1);
302    return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
303}
304
305PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
306{
307    if (!length) {
308        data = 0;
309        return empty();
310    }
311
312    // Allocate a single buffer large enough to contain the StringImpl
313    // struct as well as the data which it contains. This removes one
314    // heap allocation from this call.
315    StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<UChar>(length)));
316
317    data = reinterpret_cast<UChar*>(string + 1);
318    return adoptRef(new (string) StringImpl(length));
319}
320
321PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length)
322{
323    ASSERT(originalString->hasOneRef());
324
325    if (!length)
326        return empty();
327
328    bool is8Bit = originalString->is8Bit();
329    // Same as createUninitialized() except here we use realloc.
330    size_t size = is8Bit ? allocationSize<LChar>(length) : allocationSize<UChar>(length);
331    originalString->~StringImpl();
332    StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
333    if (is8Bit)
334        return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
335    return adoptRef(new (string) StringImpl(length));
336}
337
338static StaticStringsTable& staticStrings()
339{
340    DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
341    return staticStrings;
342}
343
344#ifndef NDEBUG
345static bool s_allowCreationOfStaticStrings = true;
346#endif
347
348const StaticStringsTable& StringImpl::allStaticStrings()
349{
350    return staticStrings();
351}
352
353void StringImpl::freezeStaticStrings()
354{
355    ASSERT(isMainThread());
356
357#ifndef NDEBUG
358    s_allowCreationOfStaticStrings = false;
359#endif
360}
361
362unsigned StringImpl::m_highestStaticStringLength = 0;
363
364StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
365{
366    ASSERT(s_allowCreationOfStaticStrings);
367    ASSERT(string);
368    ASSERT(length);
369
370    StaticStringsTable::const_iterator it = staticStrings().find(hash);
371    if (it != staticStrings().end()) {
372        ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
373        return it->value;
374    }
375
376    // Allocate a single buffer large enough to contain the StringImpl
377    // struct as well as the data which it contains. This removes one
378    // heap allocation from this call.
379    RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
380    size_t size = sizeof(StringImpl) + length * sizeof(LChar);
381
382    WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
383    StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
384
385    LChar* data = reinterpret_cast<LChar*>(impl + 1);
386    impl = new (impl) StringImpl(length, hash, StaticString);
387    memcpy(data, string, length * sizeof(LChar));
388#ifndef NDEBUG
389    impl->assertHashIsCorrect();
390#endif
391
392    ASSERT(isMainThread());
393    m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
394    staticStrings().add(hash, impl);
395    WTF_ANNOTATE_BENIGN_RACE(impl,
396        "Benign race on the reference counter of a static string created by StringImpl::createStatic");
397
398    return impl;
399}
400
401PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
402{
403    if (!characters || !length)
404        return empty();
405
406    UChar* data;
407    RefPtr<StringImpl> string = createUninitialized(length, data);
408    memcpy(data, characters, length * sizeof(UChar));
409    return string.release();
410}
411
412PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
413{
414    if (!characters || !length)
415        return empty();
416
417    LChar* data;
418    RefPtr<StringImpl> string = createUninitialized(length, data);
419    memcpy(data, characters, length * sizeof(LChar));
420    return string.release();
421}
422
423PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
424{
425    if (!characters || !length)
426        return empty();
427
428    LChar* data;
429    RefPtr<StringImpl> string = createUninitialized(length, data);
430
431    for (size_t i = 0; i < length; ++i) {
432        if (characters[i] & 0xff00)
433            return create(characters, length);
434        data[i] = static_cast<LChar>(characters[i]);
435    }
436
437    return string.release();
438}
439
440PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
441{
442    if (!string)
443        return empty();
444    size_t length = strlen(reinterpret_cast<const char*>(string));
445    RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
446    return create(string, length);
447}
448
449bool StringImpl::containsOnlyWhitespace()
450{
451    // FIXME: The definition of whitespace here includes a number of characters
452    // that are not whitespace from the point of view of RenderText; I wonder if
453    // that's a problem in practice.
454    if (is8Bit()) {
455        for (unsigned i = 0; i < m_length; ++i) {
456            UChar c = characters8()[i];
457            if (!isASCIISpace(c))
458                return false;
459        }
460
461        return true;
462    }
463
464    for (unsigned i = 0; i < m_length; ++i) {
465        UChar c = characters16()[i];
466        if (!isASCIISpace(c))
467            return false;
468    }
469    return true;
470}
471
472PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
473{
474    if (start >= m_length)
475        return empty();
476    unsigned maxLength = m_length - start;
477    if (length >= maxLength) {
478        if (!start)
479            return this;
480        length = maxLength;
481    }
482    if (is8Bit())
483        return create(characters8() + start, length);
484
485    return create(characters16() + start, length);
486}
487
488UChar32 StringImpl::characterStartingAt(unsigned i)
489{
490    if (is8Bit())
491        return characters8()[i];
492    if (U16_IS_SINGLE(characters16()[i]))
493        return characters16()[i];
494    if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1]))
495        return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
496    return 0;
497}
498
499PassRefPtr<StringImpl> StringImpl::lower()
500{
501    // Note: This is a hot function in the Dromaeo benchmark, specifically the
502    // no-op code path up through the first 'return' statement.
503
504    // First scan the string for uppercase and non-ASCII characters:
505    bool noUpper = true;
506    UChar ored = 0;
507    if (is8Bit()) {
508        const LChar* end = characters8() + m_length;
509        for (const LChar* chp = characters8(); chp != end; ++chp) {
510            if (UNLIKELY(isASCIIUpper(*chp)))
511                noUpper = false;
512            ored |= *chp;
513        }
514        // Nothing to do if the string is all ASCII with no uppercase.
515        if (noUpper && !(ored & ~0x7F))
516            return this;
517
518        RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
519        int32_t length = m_length;
520
521        LChar* data8;
522        RefPtr<StringImpl> newImpl = createUninitialized(length, data8);
523
524        if (!(ored & ~0x7F)) {
525            for (int32_t i = 0; i < length; ++i)
526                data8[i] = toASCIILower(characters8()[i]);
527
528            return newImpl.release();
529        }
530
531        // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
532        for (int32_t i = 0; i < length; ++i)
533            data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
534
535        return newImpl.release();
536    }
537
538    const UChar* end = characters16() + m_length;
539    for (const UChar* chp = characters16(); chp != end; ++chp) {
540        if (UNLIKELY(isASCIIUpper(*chp)))
541            noUpper = false;
542        ored |= *chp;
543    }
544    // Nothing to do if the string is all ASCII with no uppercase.
545    if (noUpper && !(ored & ~0x7F))
546        return this;
547
548    RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
549    int32_t length = m_length;
550
551    if (!(ored & ~0x7F)) {
552        UChar* data16;
553        RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
554
555        for (int32_t i = 0; i < length; ++i) {
556            UChar c = characters16()[i];
557            data16[i] = toASCIILower(c);
558        }
559        return newImpl.release();
560    }
561
562    // Do a slower implementation for cases that include non-ASCII characters.
563    UChar* data16;
564    RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
565
566    bool error;
567    int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error);
568    if (!error && realLength == length)
569        return newImpl.release();
570
571    newImpl = createUninitialized(realLength, data16);
572    Unicode::toLower(data16, realLength, characters16(), m_length, &error);
573    if (error)
574        return this;
575    return newImpl.release();
576}
577
578PassRefPtr<StringImpl> StringImpl::upper()
579{
580    // This function could be optimized for no-op cases the way lower() is,
581    // but in empirical testing, few actual calls to upper() are no-ops, so
582    // it wouldn't be worth the extra time for pre-scanning.
583
584    RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
585    int32_t length = m_length;
586
587    if (is8Bit()) {
588        LChar* data8;
589        RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
590
591        // Do a faster loop for the case where all the characters are ASCII.
592        LChar ored = 0;
593        for (int i = 0; i < length; ++i) {
594            LChar c = characters8()[i];
595            ored |= c;
596            data8[i] = toASCIIUpper(c);
597        }
598        if (!(ored & ~0x7F))
599            return newImpl.release();
600
601        // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
602        int numberSharpSCharacters = 0;
603
604        // There are two special cases.
605        //  1. latin-1 characters when converted to upper case are 16 bit characters.
606        //  2. Lower case sharp-S converts to "SS" (two characters)
607        for (int32_t i = 0; i < length; ++i) {
608            LChar c = characters8()[i];
609            if (UNLIKELY(c == smallLetterSharpS))
610                ++numberSharpSCharacters;
611            UChar upper = Unicode::toUpper(c);
612            if (UNLIKELY(upper > 0xff)) {
613                // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
614                goto upconvert;
615            }
616            data8[i] = static_cast<LChar>(upper);
617        }
618
619        if (!numberSharpSCharacters)
620            return newImpl.release();
621
622        // We have numberSSCharacters sharp-s characters, but none of the other special characters.
623        newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
624
625        LChar* dest = data8;
626
627        for (int32_t i = 0; i < length; ++i) {
628            LChar c = characters8()[i];
629            if (c == smallLetterSharpS) {
630                *dest++ = 'S';
631                *dest++ = 'S';
632            } else
633                *dest++ = static_cast<LChar>(Unicode::toUpper(c));
634        }
635
636        return newImpl.release();
637    }
638
639upconvert:
640    RefPtr<StringImpl> upconverted = upconvertedString();
641    const UChar* source16 = upconverted->characters16();
642
643    UChar* data16;
644    RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
645
646    // Do a faster loop for the case where all the characters are ASCII.
647    UChar ored = 0;
648    for (int i = 0; i < length; ++i) {
649        UChar c = source16[i];
650        ored |= c;
651        data16[i] = toASCIIUpper(c);
652    }
653    if (!(ored & ~0x7F))
654        return newImpl.release();
655
656    // Do a slower implementation for cases that include non-ASCII characters.
657    bool error;
658    int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
659    if (!error && realLength == length)
660        return newImpl;
661    newImpl = createUninitialized(realLength, data16);
662    Unicode::toUpper(data16, realLength, source16, m_length, &error);
663    if (error)
664        return this;
665    return newImpl.release();
666}
667
668static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)
669{
670    if (equalIgnoringCase(localeId, lang))
671        return true;
672    static char localeIdPrefix[4];
673    static const char delimeter[4] = "-_@";
674
675    size_t langLength = strlen(lang);
676    RELEASE_ASSERT(langLength >= 2 && langLength <= 3);
677    strncpy(localeIdPrefix, lang, langLength);
678    for (int i = 0; i < 3; ++i) {
679        localeIdPrefix[langLength] = delimeter[i];
680        // case-insensitive comparison
681        if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLength + 1, false))
682            return true;
683    }
684    return false;
685}
686
687typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, const char*, UErrorCode*);
688
689static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString)
690{
691    UChar* data16;
692    int32_t targetLength = length;
693    RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
694    do {
695        UErrorCode status = U_ZERO_ERROR;
696        targetLength = converter(data16, targetLength, source16, length, locale, &status);
697        if (U_SUCCESS(status)) {
698            output->truncateAssumingIsolated(targetLength);
699            return output.release();
700        }
701        if (status != U_BUFFER_OVERFLOW_ERROR)
702            return originalString;
703        // Expand the buffer.
704        output = StringImpl::createUninitialized(targetLength, data16);
705    } while (true);
706}
707
708PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
709{
710    // Use the more-optimized code path most of the time.
711    // Only Turkic (tr and az) languages and Lithuanian requires
712    // locale-specific lowercasing rules. Even though CLDR has el-Lower,
713    // it's identical to the locale-agnostic lowercasing. Context-dependent
714    // handling of Greek capital sigma is built into the common lowercasing
715    // function in ICU.
716    const char* localeForConversion = 0;
717    if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
718        localeForConversion = "tr";
719    else if (localeIdMatchesLang(localeIdentifier, "lt"))
720        localeForConversion = "lt";
721    else
722        return lower();
723
724    if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
725        CRASH();
726    int length = m_length;
727
728    RefPtr<StringImpl> upconverted = upconvertedString();
729    const UChar* source16 = upconverted->characters16();
730    return caseConvert(source16, length, u_strToLower, localeForConversion, this);
731}
732
733PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
734{
735    // Use the more-optimized code path most of the time.
736    // Only Turkic (tr and az) languages and Greek require locale-specific
737    // lowercasing rules.
738    icu::UnicodeString transliteratorId;
739    const char* localeForConversion = 0;
740    if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
741        localeForConversion = "tr";
742    else if (localeIdMatchesLang(localeIdentifier, "el"))
743        transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
744    else if (localeIdMatchesLang(localeIdentifier, "lt"))
745        localeForConversion = "lt";
746    else
747        return upper();
748
749    if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
750        CRASH();
751    int length = m_length;
752
753    RefPtr<StringImpl> upconverted = upconvertedString();
754    const UChar* source16 = upconverted->characters16();
755
756    if (localeForConversion)
757        return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
758
759    // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
760    UErrorCode status = U_ZERO_ERROR;
761    OwnPtr<icu::Transliterator> translit =
762        adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORWARD, status));
763    if (U_FAILURE(status))
764        return upper();
765
766    // target will be copy-on-write.
767    icu::UnicodeString target(false, source16, length);
768    translit->transliterate(target);
769
770    return create(target.getBuffer(), target.length());
771}
772
773PassRefPtr<StringImpl> StringImpl::fill(UChar character)
774{
775    if (!(character & ~0x7F)) {
776        LChar* data;
777        RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
778        for (unsigned i = 0; i < m_length; ++i)
779            data[i] = character;
780        return newImpl.release();
781    }
782    UChar* data;
783    RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
784    for (unsigned i = 0; i < m_length; ++i)
785        data[i] = character;
786    return newImpl.release();
787}
788
789PassRefPtr<StringImpl> StringImpl::foldCase()
790{
791    RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
792    int32_t length = m_length;
793
794    if (is8Bit()) {
795        // Do a faster loop for the case where all the characters are ASCII.
796        LChar* data;
797        RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
798        LChar ored = 0;
799
800        for (int32_t i = 0; i < length; ++i) {
801            LChar c = characters8()[i];
802            data[i] = toASCIILower(c);
803            ored |= c;
804        }
805
806        if (!(ored & ~0x7F))
807            return newImpl.release();
808
809        // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
810        for (int32_t i = 0; i < length; ++i)
811            data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
812
813        return newImpl.release();
814    }
815
816    // Do a faster loop for the case where all the characters are ASCII.
817    UChar* data;
818    RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
819    UChar ored = 0;
820    for (int32_t i = 0; i < length; ++i) {
821        UChar c = characters16()[i];
822        ored |= c;
823        data[i] = toASCIILower(c);
824    }
825    if (!(ored & ~0x7F))
826        return newImpl.release();
827
828    // Do a slower implementation for cases that include non-ASCII characters.
829    bool error;
830    int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
831    if (!error && realLength == length)
832        return newImpl.release();
833    newImpl = createUninitialized(realLength, data);
834    Unicode::foldCase(data, realLength, characters16(), m_length, &error);
835    if (error)
836        return this;
837    return newImpl.release();
838}
839
840template <class UCharPredicate>
841inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
842{
843    if (!m_length)
844        return empty();
845
846    unsigned start = 0;
847    unsigned end = m_length - 1;
848
849    // skip white space from start
850    while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start]))
851        ++start;
852
853    // only white space
854    if (start > end)
855        return empty();
856
857    // skip white space from end
858    while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
859        --end;
860
861    if (!start && end == m_length - 1)
862        return this;
863    if (is8Bit())
864        return create(characters8() + start, end + 1 - start);
865    return create(characters16() + start, end + 1 - start);
866}
867
868class UCharPredicate {
869public:
870    inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { }
871
872    inline bool operator()(UChar ch) const
873    {
874        return m_function(ch);
875    }
876
877private:
878    const CharacterMatchFunctionPtr m_function;
879};
880
881class SpaceOrNewlinePredicate {
882public:
883    inline bool operator()(UChar ch) const
884    {
885        return isSpaceOrNewline(ch);
886    }
887};
888
889PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
890{
891    return stripMatchedCharacters(SpaceOrNewlinePredicate());
892}
893
894PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
895{
896    return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
897}
898
899template <typename CharType>
900ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
901{
902    const CharType* from = characters;
903    const CharType* fromend = from + m_length;
904
905    // Assume the common case will not remove any characters
906    while (from != fromend && !findMatch(*from))
907        ++from;
908    if (from == fromend)
909        return this;
910
911    StringBuffer<CharType> data(m_length);
912    CharType* to = data.characters();
913    unsigned outc = from - characters;
914
915    if (outc)
916        memcpy(to, characters, outc * sizeof(CharType));
917
918    while (true) {
919        while (from != fromend && findMatch(*from))
920            ++from;
921        while (from != fromend && !findMatch(*from))
922            to[outc++] = *from++;
923        if (from == fromend)
924            break;
925    }
926
927    data.shrink(outc);
928
929    return data.release();
930}
931
932PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
933{
934    if (is8Bit())
935        return removeCharacters(characters8(), findMatch);
936    return removeCharacters(characters16(), findMatch);
937}
938
939template <typename CharType, class UCharPredicate>
940inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior)
941{
942    StringBuffer<CharType> data(m_length);
943
944    const CharType* from = getCharacters<CharType>();
945    const CharType* fromend = from + m_length;
946    int outc = 0;
947    bool changedToSpace = false;
948
949    CharType* to = data.characters();
950
951    if (stripBehavior == StripExtraWhiteSpace) {
952        while (true) {
953            while (from != fromend && predicate(*from)) {
954                if (*from != ' ')
955                    changedToSpace = true;
956                ++from;
957            }
958            while (from != fromend && !predicate(*from))
959                to[outc++] = *from++;
960            if (from != fromend)
961                to[outc++] = ' ';
962            else
963                break;
964        }
965
966        if (outc > 0 && to[outc - 1] == ' ')
967            --outc;
968    } else {
969        for (; from != fromend; ++from) {
970            if (predicate(*from)) {
971                if (*from != ' ')
972                    changedToSpace = true;
973                to[outc++] = ' ';
974            } else {
975                to[outc++] = *from;
976            }
977        }
978    }
979
980    if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
981        return this;
982
983    data.shrink(outc);
984
985    return data.release();
986}
987
988PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior)
989{
990    if (is8Bit())
991        return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior);
992    return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior);
993}
994
995PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior)
996{
997    if (is8Bit())
998        return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior);
999    return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior);
1000}
1001
1002int StringImpl::toIntStrict(bool* ok, int base)
1003{
1004    if (is8Bit())
1005        return charactersToIntStrict(characters8(), m_length, ok, base);
1006    return charactersToIntStrict(characters16(), m_length, ok, base);
1007}
1008
1009unsigned StringImpl::toUIntStrict(bool* ok, int base)
1010{
1011    if (is8Bit())
1012        return charactersToUIntStrict(characters8(), m_length, ok, base);
1013    return charactersToUIntStrict(characters16(), m_length, ok, base);
1014}
1015
1016int64_t StringImpl::toInt64Strict(bool* ok, int base)
1017{
1018    if (is8Bit())
1019        return charactersToInt64Strict(characters8(), m_length, ok, base);
1020    return charactersToInt64Strict(characters16(), m_length, ok, base);
1021}
1022
1023uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
1024{
1025    if (is8Bit())
1026        return charactersToUInt64Strict(characters8(), m_length, ok, base);
1027    return charactersToUInt64Strict(characters16(), m_length, ok, base);
1028}
1029
1030intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
1031{
1032    if (is8Bit())
1033        return charactersToIntPtrStrict(characters8(), m_length, ok, base);
1034    return charactersToIntPtrStrict(characters16(), m_length, ok, base);
1035}
1036
1037int StringImpl::toInt(bool* ok)
1038{
1039    if (is8Bit())
1040        return charactersToInt(characters8(), m_length, ok);
1041    return charactersToInt(characters16(), m_length, ok);
1042}
1043
1044unsigned StringImpl::toUInt(bool* ok)
1045{
1046    if (is8Bit())
1047        return charactersToUInt(characters8(), m_length, ok);
1048    return charactersToUInt(characters16(), m_length, ok);
1049}
1050
1051int64_t StringImpl::toInt64(bool* ok)
1052{
1053    if (is8Bit())
1054        return charactersToInt64(characters8(), m_length, ok);
1055    return charactersToInt64(characters16(), m_length, ok);
1056}
1057
1058uint64_t StringImpl::toUInt64(bool* ok)
1059{
1060    if (is8Bit())
1061        return charactersToUInt64(characters8(), m_length, ok);
1062    return charactersToUInt64(characters16(), m_length, ok);
1063}
1064
1065intptr_t StringImpl::toIntPtr(bool* ok)
1066{
1067    if (is8Bit())
1068        return charactersToIntPtr(characters8(), m_length, ok);
1069    return charactersToIntPtr(characters16(), m_length, ok);
1070}
1071
1072double StringImpl::toDouble(bool* ok)
1073{
1074    if (is8Bit())
1075        return charactersToDouble(characters8(), m_length, ok);
1076    return charactersToDouble(characters16(), m_length, ok);
1077}
1078
1079float StringImpl::toFloat(bool* ok)
1080{
1081    if (is8Bit())
1082        return charactersToFloat(characters8(), m_length, ok);
1083    return charactersToFloat(characters16(), m_length, ok);
1084}
1085
1086bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
1087{
1088    while (length--) {
1089        LChar bc = *b++;
1090        if (foldCase(*a++) != foldCase(bc))
1091            return false;
1092    }
1093    return true;
1094}
1095
1096bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
1097{
1098    while (length--) {
1099        LChar bc = *b++;
1100        if (foldCase(*a++) != foldCase(bc))
1101            return false;
1102    }
1103    return true;
1104}
1105
1106size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
1107{
1108    if (is8Bit())
1109        return WTF::find(characters8(), m_length, matchFunction, start);
1110    return WTF::find(characters16(), m_length, matchFunction, start);
1111}
1112
1113size_t StringImpl::find(const LChar* matchString, unsigned index)
1114{
1115    // Check for null or empty string to match against
1116    if (!matchString)
1117        return kNotFound;
1118    size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1119    RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1120    unsigned matchLength = matchStringLength;
1121    if (!matchLength)
1122        return min(index, length());
1123
1124    // Optimization 1: fast case for strings of length 1.
1125    if (matchLength == 1)
1126        return WTF::find(characters16(), length(), *matchString, index);
1127
1128    // Check index & matchLength are in range.
1129    if (index > length())
1130        return kNotFound;
1131    unsigned searchLength = length() - index;
1132    if (matchLength > searchLength)
1133        return kNotFound;
1134    // delta is the number of additional times to test; delta == 0 means test only once.
1135    unsigned delta = searchLength - matchLength;
1136
1137    const UChar* searchCharacters = characters16() + index;
1138
1139    // Optimization 2: keep a running hash of the strings,
1140    // only call equal if the hashes match.
1141    unsigned searchHash = 0;
1142    unsigned matchHash = 0;
1143    for (unsigned i = 0; i < matchLength; ++i) {
1144        searchHash += searchCharacters[i];
1145        matchHash += matchString[i];
1146    }
1147
1148    unsigned i = 0;
1149    // keep looping until we match
1150    while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
1151        if (i == delta)
1152            return kNotFound;
1153        searchHash += searchCharacters[i + matchLength];
1154        searchHash -= searchCharacters[i];
1155        ++i;
1156    }
1157    return index + i;
1158}
1159
1160template<typename CharType>
1161ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength)
1162{
1163    // delta is the number of additional times to test; delta == 0 means test only once.
1164    unsigned delta = searchLength - matchLength;
1165
1166    unsigned i = 0;
1167    while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1168        if (i == delta)
1169            return kNotFound;
1170        ++i;
1171    }
1172    return index + i;
1173}
1174
1175size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
1176{
1177    // Check for null or empty string to match against
1178    if (!matchString)
1179        return kNotFound;
1180    size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1181    RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1182    unsigned matchLength = matchStringLength;
1183    if (!matchLength)
1184        return min(index, length());
1185
1186    // Check index & matchLength are in range.
1187    if (index > length())
1188        return kNotFound;
1189    unsigned searchLength = length() - index;
1190    if (matchLength > searchLength)
1191        return kNotFound;
1192
1193    if (is8Bit())
1194        return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength);
1195    return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
1196}
1197
1198template <typename SearchCharacterType, typename MatchCharacterType>
1199ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1200{
1201    // Optimization: keep a running hash of the strings,
1202    // only call equal() if the hashes match.
1203
1204    // delta is the number of additional times to test; delta == 0 means test only once.
1205    unsigned delta = searchLength - matchLength;
1206
1207    unsigned searchHash = 0;
1208    unsigned matchHash = 0;
1209
1210    for (unsigned i = 0; i < matchLength; ++i) {
1211        searchHash += searchCharacters[i];
1212        matchHash += matchCharacters[i];
1213    }
1214
1215    unsigned i = 0;
1216    // keep looping until we match
1217    while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
1218        if (i == delta)
1219            return kNotFound;
1220        searchHash += searchCharacters[i + matchLength];
1221        searchHash -= searchCharacters[i];
1222        ++i;
1223    }
1224    return index + i;
1225}
1226
1227size_t StringImpl::find(StringImpl* matchString)
1228{
1229    // Check for null string to match against
1230    if (UNLIKELY(!matchString))
1231        return kNotFound;
1232    unsigned matchLength = matchString->length();
1233
1234    // Optimization 1: fast case for strings of length 1.
1235    if (matchLength == 1) {
1236        if (is8Bit()) {
1237            if (matchString->is8Bit())
1238                return WTF::find(characters8(), length(), matchString->characters8()[0]);
1239            return WTF::find(characters8(), length(), matchString->characters16()[0]);
1240        }
1241        if (matchString->is8Bit())
1242            return WTF::find(characters16(), length(), matchString->characters8()[0]);
1243        return WTF::find(characters16(), length(), matchString->characters16()[0]);
1244    }
1245
1246    // Check matchLength is in range.
1247    if (matchLength > length())
1248        return kNotFound;
1249
1250    // Check for empty string to match against
1251    if (UNLIKELY(!matchLength))
1252        return 0;
1253
1254    if (is8Bit()) {
1255        if (matchString->is8Bit())
1256            return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength);
1257        return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
1258    }
1259
1260    if (matchString->is8Bit())
1261        return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
1262
1263    return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
1264}
1265
1266size_t StringImpl::find(StringImpl* matchString, unsigned index)
1267{
1268    // Check for null or empty string to match against
1269    if (UNLIKELY(!matchString))
1270        return kNotFound;
1271
1272    unsigned matchLength = matchString->length();
1273
1274    // Optimization 1: fast case for strings of length 1.
1275    if (matchLength == 1) {
1276        if (is8Bit())
1277            return WTF::find(characters8(), length(), (*matchString)[0], index);
1278        return WTF::find(characters16(), length(), (*matchString)[0], index);
1279    }
1280
1281    if (UNLIKELY(!matchLength))
1282        return min(index, length());
1283
1284    // Check index & matchLength are in range.
1285    if (index > length())
1286        return kNotFound;
1287    unsigned searchLength = length() - index;
1288    if (matchLength > searchLength)
1289        return kNotFound;
1290
1291    if (is8Bit()) {
1292        if (matchString->is8Bit())
1293            return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1294        return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1295    }
1296
1297    if (matchString->is8Bit())
1298        return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1299
1300    return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1301}
1302
1303template <typename SearchCharacterType, typename MatchCharacterType>
1304ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1305{
1306    // delta is the number of additional times to test; delta == 0 means test only once.
1307    unsigned delta = searchLength - matchLength;
1308
1309    unsigned i = 0;
1310    // keep looping until we match
1311    while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
1312        if (i == delta)
1313            return kNotFound;
1314        ++i;
1315    }
1316    return index + i;
1317}
1318
1319size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1320{
1321    // Check for null or empty string to match against
1322    if (!matchString)
1323        return kNotFound;
1324    unsigned matchLength = matchString->length();
1325    if (!matchLength)
1326        return min(index, length());
1327
1328    // Check index & matchLength are in range.
1329    if (index > length())
1330        return kNotFound;
1331    unsigned searchLength = length() - index;
1332    if (matchLength > searchLength)
1333        return kNotFound;
1334
1335    if (is8Bit()) {
1336        if (matchString->is8Bit())
1337            return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1338        return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1339    }
1340
1341    if (matchString->is8Bit())
1342        return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1343
1344    return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1345}
1346
1347size_t StringImpl::findNextLineStart(unsigned index)
1348{
1349    if (is8Bit())
1350        return WTF::findNextLineStart(characters8(), m_length, index);
1351    return WTF::findNextLineStart(characters16(), m_length, index);
1352}
1353
1354size_t StringImpl::count(LChar c) const
1355{
1356    int count = 0;
1357    if (is8Bit()) {
1358        for (size_t i = 0; i < m_length; ++i)
1359            count += characters8()[i] == c;
1360    } else {
1361        for (size_t i = 0; i < m_length; ++i)
1362            count += characters16()[i] == c;
1363    }
1364    return count;
1365}
1366
1367size_t StringImpl::reverseFind(UChar c, unsigned index)
1368{
1369    if (is8Bit())
1370        return WTF::reverseFind(characters8(), m_length, c, index);
1371    return WTF::reverseFind(characters16(), m_length, c, index);
1372}
1373
1374template <typename SearchCharacterType, typename MatchCharacterType>
1375ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1376{
1377    // Optimization: keep a running hash of the strings,
1378    // only call equal if the hashes match.
1379
1380    // delta is the number of additional times to test; delta == 0 means test only once.
1381    unsigned delta = min(index, length - matchLength);
1382
1383    unsigned searchHash = 0;
1384    unsigned matchHash = 0;
1385    for (unsigned i = 0; i < matchLength; ++i) {
1386        searchHash += searchCharacters[delta + i];
1387        matchHash += matchCharacters[i];
1388    }
1389
1390    // keep looping until we match
1391    while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
1392        if (!delta)
1393            return kNotFound;
1394        --delta;
1395        searchHash -= searchCharacters[delta + matchLength];
1396        searchHash += searchCharacters[delta];
1397    }
1398    return delta;
1399}
1400
1401size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1402{
1403    // Check for null or empty string to match against
1404    if (!matchString)
1405        return kNotFound;
1406    unsigned matchLength = matchString->length();
1407    unsigned ourLength = length();
1408    if (!matchLength)
1409        return min(index, ourLength);
1410
1411    // Optimization 1: fast case for strings of length 1.
1412    if (matchLength == 1) {
1413        if (is8Bit())
1414            return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1415        return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index);
1416    }
1417
1418    // Check index & matchLength are in range.
1419    if (matchLength > ourLength)
1420        return kNotFound;
1421
1422    if (is8Bit()) {
1423        if (matchString->is8Bit())
1424            return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1425        return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1426    }
1427
1428    if (matchString->is8Bit())
1429        return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1430
1431    return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1432}
1433
1434template <typename SearchCharacterType, typename MatchCharacterType>
1435ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1436{
1437    // delta is the number of additional times to test; delta == 0 means test only once.
1438    unsigned delta = min(index, length - matchLength);
1439
1440    // keep looping until we match
1441    while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
1442        if (!delta)
1443            return kNotFound;
1444        --delta;
1445    }
1446    return delta;
1447}
1448
1449size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
1450{
1451    // Check for null or empty string to match against
1452    if (!matchString)
1453        return kNotFound;
1454    unsigned matchLength = matchString->length();
1455    unsigned ourLength = length();
1456    if (!matchLength)
1457        return min(index, ourLength);
1458
1459    // Check index & matchLength are in range.
1460    if (matchLength > ourLength)
1461        return kNotFound;
1462
1463    if (is8Bit()) {
1464        if (matchString->is8Bit())
1465            return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1466        return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1467    }
1468
1469    if (matchString->is8Bit())
1470        return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1471
1472    return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1473}
1474
1475ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive)
1476{
1477    ASSERT(stringImpl);
1478    ASSERT(matchLength <= stringImpl->length());
1479    ASSERT(startOffset + matchLength <= stringImpl->length());
1480
1481    if (caseSensitive) {
1482        if (stringImpl->is8Bit())
1483            return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1484        return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1485    }
1486    if (stringImpl->is8Bit())
1487        return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1488    return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1489}
1490
1491bool StringImpl::startsWith(UChar character) const
1492{
1493    return m_length && (*this)[0] == character;
1494}
1495
1496bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1497{
1498    ASSERT(matchLength);
1499    if (matchLength > length())
1500        return false;
1501    return equalInner(this, 0, matchString, matchLength, caseSensitive);
1502}
1503
1504bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
1505{
1506    ASSERT(matchString);
1507    if (m_length >= matchString->m_length) {
1508        unsigned start = m_length - matchString->m_length;
1509        return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start;
1510    }
1511    return false;
1512}
1513
1514bool StringImpl::endsWith(UChar character) const
1515{
1516    return m_length && (*this)[m_length - 1] == character;
1517}
1518
1519bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1520{
1521    ASSERT(matchLength);
1522    if (matchLength > length())
1523        return false;
1524    unsigned startOffset = length() - matchLength;
1525    return equalInner(this, startOffset, matchString, matchLength, caseSensitive);
1526}
1527
1528PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
1529{
1530    if (oldC == newC)
1531        return this;
1532
1533    if (find(oldC) == kNotFound)
1534        return this;
1535
1536    unsigned i;
1537    if (is8Bit()) {
1538        if (newC <= 0xff) {
1539            LChar* data;
1540            LChar oldChar = static_cast<LChar>(oldC);
1541            LChar newChar = static_cast<LChar>(newC);
1542
1543            RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1544
1545            for (i = 0; i != m_length; ++i) {
1546                LChar ch = characters8()[i];
1547                if (ch == oldChar)
1548                    ch = newChar;
1549                data[i] = ch;
1550            }
1551            return newImpl.release();
1552        }
1553
1554        // There is the possibility we need to up convert from 8 to 16 bit,
1555        // create a 16 bit string for the result.
1556        UChar* data;
1557        RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1558
1559        for (i = 0; i != m_length; ++i) {
1560            UChar ch = characters8()[i];
1561            if (ch == oldC)
1562                ch = newC;
1563            data[i] = ch;
1564        }
1565
1566        return newImpl.release();
1567    }
1568
1569    UChar* data;
1570    RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1571
1572    for (i = 0; i != m_length; ++i) {
1573        UChar ch = characters16()[i];
1574        if (ch == oldC)
1575            ch = newC;
1576        data[i] = ch;
1577    }
1578    return newImpl.release();
1579}
1580
1581PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
1582{
1583    position = min(position, length());
1584    lengthToReplace = min(lengthToReplace, length() - position);
1585    unsigned lengthToInsert = str ? str->length() : 0;
1586    if (!lengthToReplace && !lengthToInsert)
1587        return this;
1588
1589    RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
1590
1591    if (is8Bit() && (!str || str->is8Bit())) {
1592        LChar* data;
1593        RefPtr<StringImpl> newImpl =
1594        createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1595        memcpy(data, characters8(), position * sizeof(LChar));
1596        if (str)
1597            memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar));
1598        memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace,
1599               (length() - position - lengthToReplace) * sizeof(LChar));
1600        return newImpl.release();
1601    }
1602    UChar* data;
1603    RefPtr<StringImpl> newImpl =
1604        createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1605    if (is8Bit())
1606        for (unsigned i = 0; i < position; ++i)
1607            data[i] = characters8()[i];
1608    else
1609        memcpy(data, characters16(), position * sizeof(UChar));
1610    if (str) {
1611        if (str->is8Bit())
1612            for (unsigned i = 0; i < lengthToInsert; ++i)
1613                data[i + position] = str->characters8()[i];
1614        else
1615            memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar));
1616    }
1617    if (is8Bit()) {
1618        for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1619            data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace];
1620    } else {
1621        memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace,
1622            (length() - position - lengthToReplace) * sizeof(UChar));
1623    }
1624    return newImpl.release();
1625}
1626
1627PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
1628{
1629    if (!replacement)
1630        return this;
1631
1632    if (replacement->is8Bit())
1633        return replace(pattern, replacement->characters8(), replacement->length());
1634
1635    return replace(pattern, replacement->characters16(), replacement->length());
1636}
1637
1638PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
1639{
1640    ASSERT(replacement);
1641
1642    size_t srcSegmentStart = 0;
1643    unsigned matchCount = 0;
1644
1645    // Count the matches.
1646    while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1647        ++matchCount;
1648        ++srcSegmentStart;
1649    }
1650
1651    // If we have 0 matches then we don't have to do any more work.
1652    if (!matchCount)
1653        return this;
1654
1655    RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1656
1657    unsigned replaceSize = matchCount * repStrLength;
1658    unsigned newSize = m_length - matchCount;
1659    RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1660
1661    newSize += replaceSize;
1662
1663    // Construct the new data.
1664    size_t srcSegmentEnd;
1665    unsigned srcSegmentLength;
1666    srcSegmentStart = 0;
1667    unsigned dstOffset = 0;
1668
1669    if (is8Bit()) {
1670        LChar* data;
1671        RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1672
1673        while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1674            srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1675            memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1676            dstOffset += srcSegmentLength;
1677            memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1678            dstOffset += repStrLength;
1679            srcSegmentStart = srcSegmentEnd + 1;
1680        }
1681
1682        srcSegmentLength = m_length - srcSegmentStart;
1683        memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1684
1685        ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1686
1687        return newImpl.release();
1688    }
1689
1690    UChar* data;
1691    RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1692
1693    while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1694        srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1695        memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1696
1697        dstOffset += srcSegmentLength;
1698        for (unsigned i = 0; i < repStrLength; ++i)
1699            data[i + dstOffset] = replacement[i];
1700
1701        dstOffset += repStrLength;
1702        srcSegmentStart = srcSegmentEnd + 1;
1703    }
1704
1705    srcSegmentLength = m_length - srcSegmentStart;
1706    memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1707
1708    ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1709
1710    return newImpl.release();
1711}
1712
1713PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
1714{
1715    ASSERT(replacement);
1716
1717    size_t srcSegmentStart = 0;
1718    unsigned matchCount = 0;
1719
1720    // Count the matches.
1721    while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1722        ++matchCount;
1723        ++srcSegmentStart;
1724    }
1725
1726    // If we have 0 matches then we don't have to do any more work.
1727    if (!matchCount)
1728        return this;
1729
1730    RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1731
1732    unsigned replaceSize = matchCount * repStrLength;
1733    unsigned newSize = m_length - matchCount;
1734    RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1735
1736    newSize += replaceSize;
1737
1738    // Construct the new data.
1739    size_t srcSegmentEnd;
1740    unsigned srcSegmentLength;
1741    srcSegmentStart = 0;
1742    unsigned dstOffset = 0;
1743
1744    if (is8Bit()) {
1745        UChar* data;
1746        RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1747
1748        while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1749            srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1750            for (unsigned i = 0; i < srcSegmentLength; ++i)
1751                data[i + dstOffset] = characters8()[i + srcSegmentStart];
1752
1753            dstOffset += srcSegmentLength;
1754            memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1755
1756            dstOffset += repStrLength;
1757            srcSegmentStart = srcSegmentEnd + 1;
1758        }
1759
1760        srcSegmentLength = m_length - srcSegmentStart;
1761        for (unsigned i = 0; i < srcSegmentLength; ++i)
1762            data[i + dstOffset] = characters8()[i + srcSegmentStart];
1763
1764        ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1765
1766        return newImpl.release();
1767    }
1768
1769    UChar* data;
1770    RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1771
1772    while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1773        srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1774        memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1775
1776        dstOffset += srcSegmentLength;
1777        memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1778
1779        dstOffset += repStrLength;
1780        srcSegmentStart = srcSegmentEnd + 1;
1781    }
1782
1783    srcSegmentLength = m_length - srcSegmentStart;
1784    memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1785
1786    ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1787
1788    return newImpl.release();
1789}
1790
1791PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
1792{
1793    if (!pattern || !replacement)
1794        return this;
1795
1796    unsigned patternLength = pattern->length();
1797    if (!patternLength)
1798        return this;
1799
1800    unsigned repStrLength = replacement->length();
1801    size_t srcSegmentStart = 0;
1802    unsigned matchCount = 0;
1803
1804    // Count the matches.
1805    while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1806        ++matchCount;
1807        srcSegmentStart += patternLength;
1808    }
1809
1810    // If we have 0 matches, we don't have to do any more work
1811    if (!matchCount)
1812        return this;
1813
1814    unsigned newSize = m_length - matchCount * patternLength;
1815    RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1816
1817    RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength));
1818
1819    newSize += matchCount * repStrLength;
1820
1821
1822    // Construct the new data
1823    size_t srcSegmentEnd;
1824    unsigned srcSegmentLength;
1825    srcSegmentStart = 0;
1826    unsigned dstOffset = 0;
1827    bool srcIs8Bit = is8Bit();
1828    bool replacementIs8Bit = replacement->is8Bit();
1829
1830    // There are 4 cases:
1831    // 1. This and replacement are both 8 bit.
1832    // 2. This and replacement are both 16 bit.
1833    // 3. This is 8 bit and replacement is 16 bit.
1834    // 4. This is 16 bit and replacement is 8 bit.
1835    if (srcIs8Bit && replacementIs8Bit) {
1836        // Case 1
1837        LChar* data;
1838        RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1839        while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1840            srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1841            memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1842            dstOffset += srcSegmentLength;
1843            memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
1844            dstOffset += repStrLength;
1845            srcSegmentStart = srcSegmentEnd + patternLength;
1846        }
1847
1848        srcSegmentLength = m_length - srcSegmentStart;
1849        memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1850
1851        ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1852
1853        return newImpl.release();
1854    }
1855
1856    UChar* data;
1857    RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1858    while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1859        srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1860        if (srcIs8Bit) {
1861            // Case 3.
1862            for (unsigned i = 0; i < srcSegmentLength; ++i)
1863                data[i + dstOffset] = characters8()[i + srcSegmentStart];
1864        } else {
1865            // Case 2 & 4.
1866            memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1867        }
1868        dstOffset += srcSegmentLength;
1869        if (replacementIs8Bit) {
1870            // Cases 2 & 3.
1871            for (unsigned i = 0; i < repStrLength; ++i)
1872                data[i + dstOffset] = replacement->characters8()[i];
1873        } else {
1874            // Case 4
1875            memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
1876        }
1877        dstOffset += repStrLength;
1878        srcSegmentStart = srcSegmentEnd + patternLength;
1879    }
1880
1881    srcSegmentLength = m_length - srcSegmentStart;
1882    if (srcIs8Bit) {
1883        // Case 3.
1884        for (unsigned i = 0; i < srcSegmentLength; ++i)
1885            data[i + dstOffset] = characters8()[i + srcSegmentStart];
1886    } else {
1887        // Cases 2 & 4.
1888        memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1889    }
1890
1891    ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1892
1893    return newImpl.release();
1894}
1895
1896PassRefPtr<StringImpl> StringImpl::upconvertedString()
1897{
1898    if (is8Bit())
1899        return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl();
1900    return this;
1901}
1902
1903static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
1904{
1905    unsigned aLength = a->length();
1906    unsigned bLength = b->length();
1907    if (aLength != bLength)
1908        return false;
1909
1910    if (a->is8Bit()) {
1911        if (b->is8Bit())
1912            return equal(a->characters8(), b->characters8(), aLength);
1913
1914        return equal(a->characters8(), b->characters16(), aLength);
1915    }
1916
1917    if (b->is8Bit())
1918        return equal(a->characters16(), b->characters8(), aLength);
1919
1920    return equal(a->characters16(), b->characters16(), aLength);
1921}
1922
1923bool equal(const StringImpl* a, const StringImpl* b)
1924{
1925    if (a == b)
1926        return true;
1927    if (!a || !b)
1928        return false;
1929    if (a->isAtomic() && b->isAtomic())
1930        return false;
1931
1932    return stringImplContentEqual(a, b);
1933}
1934
1935template <typename CharType>
1936inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length)
1937{
1938    if (!a)
1939        return !b;
1940    if (!b)
1941        return false;
1942
1943    if (a->length() != length)
1944        return false;
1945    if (a->is8Bit())
1946        return equal(a->characters8(), b, length);
1947    return equal(a->characters16(), b, length);
1948}
1949
1950bool equal(const StringImpl* a, const LChar* b, unsigned length)
1951{
1952    return equalInternal(a, b, length);
1953}
1954
1955bool equal(const StringImpl* a, const UChar* b, unsigned length)
1956{
1957    return equalInternal(a, b, length);
1958}
1959
1960bool equal(const StringImpl* a, const LChar* b)
1961{
1962    if (!a)
1963        return !b;
1964    if (!b)
1965        return !a;
1966
1967    unsigned length = a->length();
1968
1969    if (a->is8Bit()) {
1970        const LChar* aPtr = a->characters8();
1971        for (unsigned i = 0; i != length; ++i) {
1972            LChar bc = b[i];
1973            LChar ac = aPtr[i];
1974            if (!bc)
1975                return false;
1976            if (ac != bc)
1977                return false;
1978        }
1979
1980        return !b[length];
1981    }
1982
1983    const UChar* aPtr = a->characters16();
1984    for (unsigned i = 0; i != length; ++i) {
1985        LChar bc = b[i];
1986        if (!bc)
1987            return false;
1988        if (aPtr[i] != bc)
1989            return false;
1990    }
1991
1992    return !b[length];
1993}
1994
1995bool equalNonNull(const StringImpl* a, const StringImpl* b)
1996{
1997    ASSERT(a && b);
1998    if (a == b)
1999        return true;
2000
2001    return stringImplContentEqual(a, b);
2002}
2003
2004bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
2005{
2006    if (a == b)
2007        return true;
2008    if (!a || !b)
2009        return false;
2010
2011    return CaseFoldingHash::equal(a, b);
2012}
2013
2014bool equalIgnoringCase(const StringImpl* a, const LChar* b)
2015{
2016    if (!a)
2017        return !b;
2018    if (!b)
2019        return !a;
2020
2021    unsigned length = a->length();
2022
2023    // Do a faster loop for the case where all the characters are ASCII.
2024    UChar ored = 0;
2025    bool equal = true;
2026    if (a->is8Bit()) {
2027        const LChar* as = a->characters8();
2028        for (unsigned i = 0; i != length; ++i) {
2029            LChar bc = b[i];
2030            if (!bc)
2031                return false;
2032            UChar ac = as[i];
2033            ored |= ac;
2034            equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2035        }
2036
2037        // Do a slower implementation for cases that include non-ASCII characters.
2038        if (ored & ~0x7F) {
2039            equal = true;
2040            for (unsigned i = 0; i != length; ++i)
2041                equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2042        }
2043
2044        return equal && !b[length];
2045    }
2046
2047    const UChar* as = a->characters16();
2048    for (unsigned i = 0; i != length; ++i) {
2049        LChar bc = b[i];
2050        if (!bc)
2051            return false;
2052        UChar ac = as[i];
2053        ored |= ac;
2054        equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2055    }
2056
2057    // Do a slower implementation for cases that include non-ASCII characters.
2058    if (ored & ~0x7F) {
2059        equal = true;
2060        for (unsigned i = 0; i != length; ++i) {
2061            equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2062        }
2063    }
2064
2065    return equal && !b[length];
2066}
2067
2068bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
2069{
2070    ASSERT(a && b);
2071    if (a == b)
2072        return true;
2073
2074    unsigned length = a->length();
2075    if (length != b->length())
2076        return false;
2077
2078    if (a->is8Bit()) {
2079        if (b->is8Bit())
2080            return equalIgnoringCase(a->characters8(), b->characters8(), length);
2081
2082        return equalIgnoringCase(b->characters16(), a->characters8(), length);
2083    }
2084
2085    if (b->is8Bit())
2086        return equalIgnoringCase(a->characters16(), b->characters8(), length);
2087
2088    return equalIgnoringCase(a->characters16(), b->characters16(), length);
2089}
2090
2091bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
2092{
2093    if (!a && b && !b->length())
2094        return true;
2095    if (!b && a && !a->length())
2096        return true;
2097    return equal(a, b);
2098}
2099
2100size_t StringImpl::sizeInBytes() const
2101{
2102    size_t size = length();
2103    if (!is8Bit())
2104        size *= 2;
2105    return size + sizeof(*this);
2106}
2107
2108} // namespace WTF
2109