1/*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
4 * Copyright (C) 2012 Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB.  If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "AtomicString.h"
25
26#include "StringHash.h"
27#include "wtf/HashSet.h"
28#include "wtf/WTFThreadData.h"
29#include "wtf/unicode/UTF8.h"
30
31namespace WTF {
32
33using namespace Unicode;
34
35COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
36
37class AtomicStringTable {
38public:
39    static AtomicStringTable* create(WTFThreadData& data)
40    {
41        data.m_atomicStringTable = new AtomicStringTable;
42        data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
43        return data.m_atomicStringTable;
44    }
45
46    HashSet<StringImpl*>& table()
47    {
48        return m_table;
49    }
50
51private:
52    static void destroy(AtomicStringTable* table)
53    {
54        HashSet<StringImpl*>::iterator end = table->m_table.end();
55        for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter)
56            (*iter)->setIsAtomic(false);
57        delete table;
58    }
59
60    HashSet<StringImpl*> m_table;
61};
62
63static inline HashSet<StringImpl*>& stringTable()
64{
65    // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
66    WTFThreadData& data = wtfThreadData();
67    AtomicStringTable* table = data.atomicStringTable();
68    if (UNLIKELY(!table))
69        table = AtomicStringTable::create(data);
70    return table->table();
71}
72
73template<typename T, typename HashTranslator>
74static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
75{
76    HashSet<StringImpl*>::AddResult addResult = stringTable().add<HashTranslator>(value);
77
78    // If the string is newly-translated, then we need to adopt it.
79    // The boolean in the pair tells us if that is so.
80    return addResult.isNewEntry ? adoptRef(*addResult.iterator) : *addResult.iterator;
81}
82
83struct CStringTranslator {
84    static unsigned hash(const LChar* c)
85    {
86        return StringHasher::computeHashAndMaskTop8Bits(c);
87    }
88
89    static inline bool equal(StringImpl* r, const LChar* s)
90    {
91        return WTF::equal(r, s);
92    }
93
94    static void translate(StringImpl*& location, const LChar* const& c, unsigned hash)
95    {
96        location = StringImpl::create(c).leakRef();
97        location->setHash(hash);
98        location->setIsAtomic(true);
99    }
100};
101
102PassRefPtr<StringImpl> AtomicString::add(const LChar* c)
103{
104    if (!c)
105        return 0;
106    if (!*c)
107        return StringImpl::empty();
108
109    return addToStringTable<const LChar*, CStringTranslator>(c);
110}
111
112template<typename CharacterType>
113struct HashTranslatorCharBuffer {
114    const CharacterType* s;
115    unsigned length;
116};
117
118typedef HashTranslatorCharBuffer<UChar> UCharBuffer;
119struct UCharBufferTranslator {
120    static unsigned hash(const UCharBuffer& buf)
121    {
122        return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
123    }
124
125    static bool equal(StringImpl* const& str, const UCharBuffer& buf)
126    {
127        return WTF::equal(str, buf.s, buf.length);
128    }
129
130    static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
131    {
132        location = StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef();
133        location->setHash(hash);
134        location->setIsAtomic(true);
135    }
136};
137
138template<typename CharacterType>
139struct HashAndCharacters {
140    unsigned hash;
141    const CharacterType* characters;
142    unsigned length;
143};
144
145template<typename CharacterType>
146struct HashAndCharactersTranslator {
147    static unsigned hash(const HashAndCharacters<CharacterType>& buffer)
148    {
149        ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length));
150        return buffer.hash;
151    }
152
153    static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer)
154    {
155        return WTF::equal(string, buffer.characters, buffer.length);
156    }
157
158    static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash)
159    {
160        location = StringImpl::create(buffer.characters, buffer.length).leakRef();
161        location->setHash(hash);
162        location->setIsAtomic(true);
163    }
164};
165
166struct HashAndUTF8Characters {
167    unsigned hash;
168    const char* characters;
169    unsigned length;
170    unsigned utf16Length;
171};
172
173struct HashAndUTF8CharactersTranslator {
174    static unsigned hash(const HashAndUTF8Characters& buffer)
175    {
176        return buffer.hash;
177    }
178
179    static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
180    {
181        if (buffer.utf16Length != string->length())
182            return false;
183
184        // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
185        if (buffer.utf16Length != buffer.length) {
186            if (string->is8Bit()) {
187                const LChar* characters8 = string->characters8();
188                return equalLatin1WithUTF8(characters8, characters8 + string->length(), buffer.characters, buffer.characters + buffer.length);
189            }
190            const UChar* characters16 = string->characters16();
191            return equalUTF16WithUTF8(characters16, characters16 + string->length(), buffer.characters, buffer.characters + buffer.length);
192        }
193
194        if (string->is8Bit()) {
195            const LChar* stringCharacters = string->characters8();
196
197            for (unsigned i = 0; i < buffer.length; ++i) {
198                ASSERT(isASCII(buffer.characters[i]));
199                if (stringCharacters[i] != buffer.characters[i])
200                    return false;
201            }
202
203            return true;
204        }
205
206        const UChar* stringCharacters = string->characters16();
207
208        for (unsigned i = 0; i < buffer.length; ++i) {
209            ASSERT(isASCII(buffer.characters[i]));
210            if (stringCharacters[i] != buffer.characters[i])
211                return false;
212        }
213
214        return true;
215    }
216
217    static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
218    {
219        UChar* target;
220        RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target);
221
222        bool isAllASCII;
223        const char* source = buffer.characters;
224        if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK)
225            ASSERT_NOT_REACHED();
226
227        if (isAllASCII)
228            newString = StringImpl::create(buffer.characters, buffer.length);
229
230        location = newString.release().leakRef();
231        location->setHash(hash);
232        location->setIsAtomic(true);
233    }
234};
235
236PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
237{
238    if (!s)
239        return 0;
240
241    if (!length)
242        return StringImpl::empty();
243
244    UCharBuffer buffer = { s, length };
245    return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
246}
247
248PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
249{
250    ASSERT(s);
251    ASSERT(existingHash);
252
253    if (!length)
254        return StringImpl::empty();
255
256    HashAndCharacters<UChar> buffer = { existingHash, s, length };
257    return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar> >(buffer);
258}
259
260PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
261{
262    if (!s)
263        return 0;
264
265    unsigned length = 0;
266    while (s[length] != UChar(0))
267        ++length;
268
269    if (!length)
270        return StringImpl::empty();
271
272    UCharBuffer buffer = { s, length };
273    return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
274}
275
276struct SubstringLocation {
277    StringImpl* baseString;
278    unsigned start;
279    unsigned length;
280};
281
282struct SubstringTranslator {
283    static unsigned hash(const SubstringLocation& buffer)
284    {
285        if (buffer.baseString->is8Bit())
286            return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length);
287        return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length);
288    }
289
290    static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
291    {
292        if (buffer.baseString->is8Bit())
293            return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length);
294        return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length);
295    }
296
297    static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash)
298    {
299        location = buffer.baseString->substring(buffer.start, buffer.length).leakRef();
300        location->setHash(hash);
301        location->setIsAtomic(true);
302    }
303};
304
305PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length)
306{
307    if (!baseString)
308        return 0;
309
310    if (!length || start >= baseString->length())
311        return StringImpl::empty();
312
313    unsigned maxLength = baseString->length() - start;
314    if (length >= maxLength) {
315        if (!start)
316            return add(baseString);
317        length = maxLength;
318    }
319
320    SubstringLocation buffer = { baseString, start, length };
321    return addToStringTable<SubstringLocation, SubstringTranslator>(buffer);
322}
323
324typedef HashTranslatorCharBuffer<LChar> LCharBuffer;
325struct LCharBufferTranslator {
326    static unsigned hash(const LCharBuffer& buf)
327    {
328        return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
329    }
330
331    static bool equal(StringImpl* const& str, const LCharBuffer& buf)
332    {
333        return WTF::equal(str, buf.s, buf.length);
334    }
335
336    static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash)
337    {
338        location = StringImpl::create(buf.s, buf.length).leakRef();
339        location->setHash(hash);
340        location->setIsAtomic(true);
341    }
342};
343
344typedef HashTranslatorCharBuffer<char> CharBuffer;
345struct CharBufferFromLiteralDataTranslator {
346    static unsigned hash(const CharBuffer& buf)
347    {
348        return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length);
349    }
350
351    static bool equal(StringImpl* const& str, const CharBuffer& buf)
352    {
353        return WTF::equal(str, buf.s, buf.length);
354    }
355
356    static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash)
357    {
358        location = StringImpl::create(buf.s, buf.length).leakRef();
359        location->setHash(hash);
360        location->setIsAtomic(true);
361    }
362};
363
364PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length)
365{
366    if (!s)
367        return 0;
368
369    if (!length)
370        return StringImpl::empty();
371
372    LCharBuffer buffer = { s, length };
373    return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
374}
375
376PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length)
377{
378    ASSERT(characters);
379    ASSERT(length);
380
381    CharBuffer buffer = { characters, length };
382    return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer);
383}
384
385PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r)
386{
387    if (!r->length())
388        return StringImpl::empty();
389
390    StringImpl* result = *stringTable().add(r).iterator;
391    if (result == r)
392        r->setIsAtomic(true);
393    ASSERT(!r->isStatic() || result->isStatic());
394    return result;
395}
396
397template<typename CharacterType>
398static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl)
399{
400    HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() };
401    return stringTable().find<HashAndCharactersTranslator<CharacterType> >(buffer);
402}
403
404StringImpl* AtomicString::find(const StringImpl* stringImpl)
405{
406    ASSERT(stringImpl);
407    ASSERT(stringImpl->existingHash());
408
409    if (!stringImpl->length())
410        return StringImpl::empty();
411
412    HashSet<StringImpl*>::iterator iterator;
413    if (stringImpl->is8Bit())
414        iterator = findString<LChar>(stringImpl);
415    else
416        iterator = findString<UChar>(stringImpl);
417    if (iterator == stringTable().end())
418        return 0;
419    return *iterator;
420}
421
422void AtomicString::remove(StringImpl* r)
423{
424    HashSet<StringImpl*>::iterator iterator;
425    if (r->is8Bit())
426        iterator = findString<LChar>(r);
427    else
428        iterator = findString<UChar>(r);
429    RELEASE_ASSERT(iterator != stringTable().end());
430    stringTable().remove(iterator);
431}
432
433AtomicString AtomicString::lower() const
434{
435    // Note: This is a hot function in the Dromaeo benchmark.
436    StringImpl* impl = this->impl();
437    if (UNLIKELY(!impl))
438        return *this;
439    RefPtr<StringImpl> newImpl = impl->lower();
440    if (LIKELY(newImpl == impl))
441        return *this;
442    return AtomicString(newImpl);
443}
444
445AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
446{
447    HashAndUTF8Characters buffer;
448    buffer.characters = charactersStart;
449    buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
450
451    if (!buffer.hash)
452        return nullAtom;
453
454    AtomicString atomicString;
455    atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
456    return atomicString;
457}
458
459#ifndef NDEBUG
460void AtomicString::show() const
461{
462    m_string.show();
463}
464#endif
465
466} // namespace WTF
467