1/*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
4 * Copyright (C) 2012 Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB.  If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "AtomicString.h"
25
26#include "StringHash.h"
27#include "wtf/HashSet.h"
28#include "wtf/WTFThreadData.h"
29#include "wtf/dtoa.h"
30#include "wtf/text/IntegerToStringConversion.h"
31#include "wtf/unicode/UTF8.h"
32
33namespace WTF {
34
35using namespace Unicode;
36
37COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
38
39class AtomicStringTable {
40    WTF_MAKE_NONCOPYABLE(AtomicStringTable);
41public:
42    static AtomicStringTable* create(WTFThreadData& data)
43    {
44        data.m_atomicStringTable = new AtomicStringTable;
45        data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
46        data.m_atomicStringTable->addStaticStrings();
47        return data.m_atomicStringTable;
48    }
49
50    StringImpl* addStringImpl(StringImpl* string)
51    {
52        if (!string->length())
53            return StringImpl::empty();
54
55        StringImpl* result = *m_table.add(string).storedValue;
56
57        if (!result->isAtomic())
58            result->setIsAtomic(true);
59
60        ASSERT(!string->isStatic() || result->isStatic());
61        return result;
62    }
63
64    HashSet<StringImpl*>& table()
65    {
66        return m_table;
67    }
68
69private:
70    AtomicStringTable() { }
71
72    void addStaticStrings()
73    {
74        const StaticStringsTable& staticStrings = StringImpl::allStaticStrings();
75
76        StaticStringsTable::const_iterator it = staticStrings.begin();
77        for (; it != staticStrings.end(); ++it) {
78            addStringImpl(it->value);
79        }
80    }
81
82    static void destroy(AtomicStringTable* table)
83    {
84        HashSet<StringImpl*>::iterator end = table->m_table.end();
85        for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) {
86            StringImpl* string = *iter;
87            if (!string->isStatic()) {
88                ASSERT(string->isAtomic());
89                string->setIsAtomic(false);
90            }
91        }
92        delete table;
93    }
94
95    HashSet<StringImpl*> m_table;
96};
97
98static inline AtomicStringTable& atomicStringTable()
99{
100    // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
101    WTFThreadData& data = wtfThreadData();
102    AtomicStringTable* table = data.atomicStringTable();
103    if (UNLIKELY(!table))
104        table = AtomicStringTable::create(data);
105    return *table;
106}
107
108static inline HashSet<StringImpl*>& atomicStrings()
109{
110    return atomicStringTable().table();
111}
112
113template<typename T, typename HashTranslator>
114static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
115{
116    HashSet<StringImpl*>::AddResult addResult = atomicStrings().add<HashTranslator>(value);
117
118    // If the string is newly-translated, then we need to adopt it.
119    // The boolean in the pair tells us if that is so.
120    return addResult.isNewEntry ? adoptRef(*addResult.storedValue) : *addResult.storedValue;
121}
122
123PassRefPtr<StringImpl> AtomicString::add(const LChar* c)
124{
125    if (!c)
126        return nullptr;
127    if (!*c)
128        return StringImpl::empty();
129
130    return add(c, strlen(reinterpret_cast<const char*>(c)));
131}
132
133template<typename CharacterType>
134struct HashTranslatorCharBuffer {
135    const CharacterType* s;
136    unsigned length;
137};
138
139typedef HashTranslatorCharBuffer<UChar> UCharBuffer;
140struct UCharBufferTranslator {
141    static unsigned hash(const UCharBuffer& buf)
142    {
143        return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
144    }
145
146    static bool equal(StringImpl* const& str, const UCharBuffer& buf)
147    {
148        return WTF::equal(str, buf.s, buf.length);
149    }
150
151    static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
152    {
153        location = StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef();
154        location->setHash(hash);
155        location->setIsAtomic(true);
156    }
157};
158
159template<typename CharacterType>
160struct HashAndCharacters {
161    unsigned hash;
162    const CharacterType* characters;
163    unsigned length;
164};
165
166template<typename CharacterType>
167struct HashAndCharactersTranslator {
168    static unsigned hash(const HashAndCharacters<CharacterType>& buffer)
169    {
170        ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length));
171        return buffer.hash;
172    }
173
174    static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer)
175    {
176        return WTF::equal(string, buffer.characters, buffer.length);
177    }
178
179    static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash)
180    {
181        location = StringImpl::create(buffer.characters, buffer.length).leakRef();
182        location->setHash(hash);
183        location->setIsAtomic(true);
184    }
185};
186
187struct HashAndUTF8Characters {
188    unsigned hash;
189    const char* characters;
190    unsigned length;
191    unsigned utf16Length;
192};
193
194struct HashAndUTF8CharactersTranslator {
195    static unsigned hash(const HashAndUTF8Characters& buffer)
196    {
197        return buffer.hash;
198    }
199
200    static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
201    {
202        if (buffer.utf16Length != string->length())
203            return false;
204
205        // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
206        if (buffer.utf16Length != buffer.length) {
207            if (string->is8Bit()) {
208                const LChar* characters8 = string->characters8();
209                return equalLatin1WithUTF8(characters8, characters8 + string->length(), buffer.characters, buffer.characters + buffer.length);
210            }
211            const UChar* characters16 = string->characters16();
212            return equalUTF16WithUTF8(characters16, characters16 + string->length(), buffer.characters, buffer.characters + buffer.length);
213        }
214
215        if (string->is8Bit()) {
216            const LChar* stringCharacters = string->characters8();
217
218            for (unsigned i = 0; i < buffer.length; ++i) {
219                ASSERT(isASCII(buffer.characters[i]));
220                if (stringCharacters[i] != buffer.characters[i])
221                    return false;
222            }
223
224            return true;
225        }
226
227        const UChar* stringCharacters = string->characters16();
228
229        for (unsigned i = 0; i < buffer.length; ++i) {
230            ASSERT(isASCII(buffer.characters[i]));
231            if (stringCharacters[i] != buffer.characters[i])
232                return false;
233        }
234
235        return true;
236    }
237
238    static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
239    {
240        UChar* target;
241        RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target);
242
243        bool isAllASCII;
244        const char* source = buffer.characters;
245        if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK)
246            ASSERT_NOT_REACHED();
247
248        if (isAllASCII)
249            newString = StringImpl::create(buffer.characters, buffer.length);
250
251        location = newString.release().leakRef();
252        location->setHash(hash);
253        location->setIsAtomic(true);
254    }
255};
256
257PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
258{
259    if (!s)
260        return nullptr;
261
262    if (!length)
263        return StringImpl::empty();
264
265    UCharBuffer buffer = { s, length };
266    return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
267}
268
269PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
270{
271    ASSERT(s);
272    ASSERT(existingHash);
273
274    if (!length)
275        return StringImpl::empty();
276
277    HashAndCharacters<UChar> buffer = { existingHash, s, length };
278    return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar> >(buffer);
279}
280
281PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
282{
283    if (!s)
284        return nullptr;
285
286    unsigned length = 0;
287    while (s[length] != UChar(0))
288        ++length;
289
290    if (!length)
291        return StringImpl::empty();
292
293    UCharBuffer buffer = { s, length };
294    return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
295}
296
297struct SubstringLocation {
298    StringImpl* baseString;
299    unsigned start;
300    unsigned length;
301};
302
303struct SubstringTranslator {
304    static unsigned hash(const SubstringLocation& buffer)
305    {
306        if (buffer.baseString->is8Bit())
307            return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length);
308        return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length);
309    }
310
311    static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
312    {
313        if (buffer.baseString->is8Bit())
314            return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length);
315        return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length);
316    }
317
318    static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash)
319    {
320        location = buffer.baseString->substring(buffer.start, buffer.length).leakRef();
321        location->setHash(hash);
322        location->setIsAtomic(true);
323    }
324};
325
326PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length)
327{
328    if (!baseString)
329        return nullptr;
330
331    if (!length || start >= baseString->length())
332        return StringImpl::empty();
333
334    unsigned maxLength = baseString->length() - start;
335    if (length >= maxLength) {
336        if (!start)
337            return add(baseString);
338        length = maxLength;
339    }
340
341    SubstringLocation buffer = { baseString, start, length };
342    return addToStringTable<SubstringLocation, SubstringTranslator>(buffer);
343}
344
345typedef HashTranslatorCharBuffer<LChar> LCharBuffer;
346struct LCharBufferTranslator {
347    static unsigned hash(const LCharBuffer& buf)
348    {
349        return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
350    }
351
352    static bool equal(StringImpl* const& str, const LCharBuffer& buf)
353    {
354        return WTF::equal(str, buf.s, buf.length);
355    }
356
357    static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash)
358    {
359        location = StringImpl::create(buf.s, buf.length).leakRef();
360        location->setHash(hash);
361        location->setIsAtomic(true);
362    }
363};
364
365typedef HashTranslatorCharBuffer<char> CharBuffer;
366struct CharBufferFromLiteralDataTranslator {
367    static unsigned hash(const CharBuffer& buf)
368    {
369        return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length);
370    }
371
372    static bool equal(StringImpl* const& str, const CharBuffer& buf)
373    {
374        return WTF::equal(str, buf.s, buf.length);
375    }
376
377    static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash)
378    {
379        location = StringImpl::create(buf.s, buf.length).leakRef();
380        location->setHash(hash);
381        location->setIsAtomic(true);
382    }
383};
384
385PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length)
386{
387    if (!s)
388        return nullptr;
389
390    if (!length)
391        return StringImpl::empty();
392
393    LCharBuffer buffer = { s, length };
394    return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
395}
396
397PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length)
398{
399    ASSERT(characters);
400    ASSERT(length);
401
402    CharBuffer buffer = { characters, length };
403    return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer);
404}
405
406PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* string)
407{
408    return atomicStringTable().addStringImpl(string);
409}
410
411template<typename CharacterType>
412static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl)
413{
414    HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() };
415    return atomicStrings().find<HashAndCharactersTranslator<CharacterType> >(buffer);
416}
417
418StringImpl* AtomicString::find(const StringImpl* stringImpl)
419{
420    ASSERT(stringImpl);
421    ASSERT(stringImpl->existingHash());
422
423    if (!stringImpl->length())
424        return StringImpl::empty();
425
426    HashSet<StringImpl*>::iterator iterator;
427    if (stringImpl->is8Bit())
428        iterator = findString<LChar>(stringImpl);
429    else
430        iterator = findString<UChar>(stringImpl);
431    if (iterator == atomicStrings().end())
432        return 0;
433    return *iterator;
434}
435
436void AtomicString::remove(StringImpl* r)
437{
438    HashSet<StringImpl*>::iterator iterator;
439    if (r->is8Bit())
440        iterator = findString<LChar>(r);
441    else
442        iterator = findString<UChar>(r);
443    RELEASE_ASSERT(iterator != atomicStrings().end());
444    atomicStrings().remove(iterator);
445}
446
447AtomicString AtomicString::lower() const
448{
449    // Note: This is a hot function in the Dromaeo benchmark.
450    StringImpl* impl = this->impl();
451    if (UNLIKELY(!impl))
452        return *this;
453    RefPtr<StringImpl> newImpl = impl->lower();
454    if (LIKELY(newImpl == impl))
455        return *this;
456    return AtomicString(newImpl.release());
457}
458
459AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
460{
461    HashAndUTF8Characters buffer;
462    buffer.characters = charactersStart;
463    buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
464
465    if (!buffer.hash)
466        return nullAtom;
467
468    AtomicString atomicString;
469    atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
470    return atomicString;
471}
472
473AtomicString AtomicString::number(int number)
474{
475    return numberToStringSigned<AtomicString>(number);
476}
477
478AtomicString AtomicString::number(unsigned number)
479{
480    return numberToStringUnsigned<AtomicString>(number);
481}
482
483AtomicString AtomicString::number(long number)
484{
485    return numberToStringSigned<AtomicString>(number);
486}
487
488AtomicString AtomicString::number(unsigned long number)
489{
490    return numberToStringUnsigned<AtomicString>(number);
491}
492
493AtomicString AtomicString::number(long long number)
494{
495    return numberToStringSigned<AtomicString>(number);
496}
497
498AtomicString AtomicString::number(unsigned long long number)
499{
500    return numberToStringUnsigned<AtomicString>(number);
501}
502
503AtomicString AtomicString::number(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy)
504{
505    NumberToStringBuffer buffer;
506    return AtomicString(numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros));
507}
508
509#ifndef NDEBUG
510void AtomicString::show() const
511{
512    m_string.show();
513}
514#endif
515
516} // namespace WTF
517