1/*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB.  If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#include "config.h"
23
24#include "AtomicString.h"
25
26#include "StringHash.h"
27#include <wtf/HashSet.h>
28#include <wtf/Threading.h>
29#include <wtf/WTFThreadData.h>
30#include <wtf/unicode/UTF8.h>
31
32namespace WTF {
33
34using namespace Unicode;
35
36COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
37
38class AtomicStringTable {
39public:
40    static AtomicStringTable* create()
41    {
42        AtomicStringTable* table = new AtomicStringTable;
43
44        WTFThreadData& data = wtfThreadData();
45        data.m_atomicStringTable = table;
46        data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
47
48        return table;
49    }
50
51    HashSet<StringImpl*>& table()
52    {
53        return m_table;
54    }
55
56private:
57    static void destroy(AtomicStringTable* table)
58    {
59        HashSet<StringImpl*>::iterator end = table->m_table.end();
60        for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter)
61            (*iter)->setIsAtomic(false);
62        delete table;
63    }
64
65    HashSet<StringImpl*> m_table;
66};
67
68static inline HashSet<StringImpl*>& stringTable()
69{
70    // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
71    AtomicStringTable* table = wtfThreadData().atomicStringTable();
72    if (UNLIKELY(!table))
73        table = AtomicStringTable::create();
74    return table->table();
75}
76
77template<typename T, typename HashTranslator>
78static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
79{
80    pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<T, HashTranslator>(value);
81
82    // If the string is newly-translated, then we need to adopt it.
83    // The boolean in the pair tells us if that is so.
84    return addResult.second ? adoptRef(*addResult.first) : *addResult.first;
85}
86
87struct CStringTranslator {
88    static unsigned hash(const char* c)
89    {
90        return StringHasher::computeHash(c);
91    }
92
93    static bool equal(StringImpl* r, const char* s)
94    {
95        int length = r->length();
96        const UChar* d = r->characters();
97        for (int i = 0; i != length; ++i) {
98            unsigned char c = s[i];
99            if (d[i] != c)
100                return false;
101        }
102        return !s[length];
103    }
104
105    static void translate(StringImpl*& location, const char* const& c, unsigned hash)
106    {
107        location = StringImpl::create(c).leakRef();
108        location->setHash(hash);
109        location->setIsAtomic(true);
110    }
111};
112
113bool operator==(const AtomicString& a, const char* b)
114{
115    StringImpl* impl = a.impl();
116    if ((!impl || !impl->characters()) && !b)
117        return true;
118    if ((!impl || !impl->characters()) || !b)
119        return false;
120    return CStringTranslator::equal(impl, b);
121}
122
123PassRefPtr<StringImpl> AtomicString::add(const char* c)
124{
125    if (!c)
126        return 0;
127    if (!*c)
128        return StringImpl::empty();
129
130    return addToStringTable<const char*, CStringTranslator>(c);
131}
132
133struct UCharBuffer {
134    const UChar* s;
135    unsigned length;
136};
137
138static inline bool equal(StringImpl* string, const UChar* characters, unsigned length)
139{
140    if (string->length() != length)
141        return false;
142
143    // FIXME: perhaps we should have a more abstract macro that indicates when
144    // going 4 bytes at a time is unsafe
145#if CPU(ARM) || CPU(SH4) || CPU(MIPS) || CPU(SPARC)
146    const UChar* stringCharacters = string->characters();
147    for (unsigned i = 0; i != length; ++i) {
148        if (*stringCharacters++ != *characters++)
149            return false;
150    }
151    return true;
152#else
153    /* Do it 4-bytes-at-a-time on architectures where it's safe */
154
155    const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters());
156    const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters);
157
158    unsigned halfLength = length >> 1;
159    for (unsigned i = 0; i != halfLength; ++i) {
160        if (*stringCharacters++ != *bufferCharacters++)
161            return false;
162    }
163
164    if (length & 1 &&  *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters))
165        return false;
166
167    return true;
168#endif
169}
170
171bool operator==(const AtomicString& string, const Vector<UChar>& vector)
172{
173    return string.impl() && equal(string.impl(), vector.data(), vector.size());
174}
175
176struct UCharBufferTranslator {
177    static unsigned hash(const UCharBuffer& buf)
178    {
179        return StringHasher::computeHash(buf.s, buf.length);
180    }
181
182    static bool equal(StringImpl* const& str, const UCharBuffer& buf)
183    {
184        return WTF::equal(str, buf.s, buf.length);
185    }
186
187    static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
188    {
189        location = StringImpl::create(buf.s, buf.length).leakRef();
190        location->setHash(hash);
191        location->setIsAtomic(true);
192    }
193};
194
195struct HashAndCharacters {
196    unsigned hash;
197    const UChar* characters;
198    unsigned length;
199};
200
201struct HashAndCharactersTranslator {
202    static unsigned hash(const HashAndCharacters& buffer)
203    {
204        ASSERT(buffer.hash == StringHasher::computeHash(buffer.characters, buffer.length));
205        return buffer.hash;
206    }
207
208    static bool equal(StringImpl* const& string, const HashAndCharacters& buffer)
209    {
210        return WTF::equal(string, buffer.characters, buffer.length);
211    }
212
213    static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash)
214    {
215        location = StringImpl::create(buffer.characters, buffer.length).leakRef();
216        location->setHash(hash);
217        location->setIsAtomic(true);
218    }
219};
220
221struct HashAndUTF8Characters {
222    unsigned hash;
223    const char* characters;
224    unsigned length;
225    unsigned utf16Length;
226};
227
228struct HashAndUTF8CharactersTranslator {
229    static unsigned hash(const HashAndUTF8Characters& buffer)
230    {
231        return buffer.hash;
232    }
233
234    static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
235    {
236        if (buffer.utf16Length != string->length())
237            return false;
238
239        const UChar* stringCharacters = string->characters();
240
241        // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
242        if (buffer.utf16Length != buffer.length)
243            return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length);
244
245        for (unsigned i = 0; i < buffer.length; ++i) {
246            ASSERT(isASCII(buffer.characters[i]));
247            if (stringCharacters[i] != buffer.characters[i])
248                return false;
249        }
250
251        return true;
252    }
253
254    static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
255    {
256        UChar* target;
257        location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef();
258
259        const char* source = buffer.characters;
260        if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK)
261            ASSERT_NOT_REACHED();
262
263        location->setHash(hash);
264        location->setIsAtomic(true);
265    }
266};
267
268PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
269{
270    if (!s)
271        return 0;
272
273    if (!length)
274        return StringImpl::empty();
275
276    UCharBuffer buffer = { s, length };
277    return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
278}
279
280PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
281{
282    ASSERT(s);
283    ASSERT(existingHash);
284
285    if (!length)
286        return StringImpl::empty();
287
288    HashAndCharacters buffer = { existingHash, s, length };
289    return addToStringTable<HashAndCharacters, HashAndCharactersTranslator>(buffer);
290}
291
292PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
293{
294    if (!s)
295        return 0;
296
297    int length = 0;
298    while (s[length] != UChar(0))
299        length++;
300
301    if (!length)
302        return StringImpl::empty();
303
304    UCharBuffer buffer = { s, length };
305    return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
306}
307
308PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r)
309{
310    if (!r || r->isAtomic())
311        return r;
312
313    if (!r->length())
314        return StringImpl::empty();
315
316    StringImpl* result = *stringTable().add(r).first;
317    if (result == r)
318        r->setIsAtomic(true);
319    return result;
320}
321
322AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash)
323{
324    ASSERT(s);
325    ASSERT(existingHash);
326
327    if (!length)
328        return static_cast<AtomicStringImpl*>(StringImpl::empty());
329
330    HashAndCharacters buffer = { existingHash, s, length };
331    HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer);
332    if (iterator == stringTable().end())
333        return 0;
334    return static_cast<AtomicStringImpl*>(*iterator);
335}
336
337void AtomicString::remove(StringImpl* r)
338{
339    stringTable().remove(r);
340}
341
342AtomicString AtomicString::lower() const
343{
344    // Note: This is a hot function in the Dromaeo benchmark.
345    StringImpl* impl = this->impl();
346    if (UNLIKELY(!impl))
347        return *this;
348    RefPtr<StringImpl> newImpl = impl->lower();
349    if (LIKELY(newImpl == impl))
350        return *this;
351    return AtomicString(newImpl);
352}
353
354AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
355{
356    HashAndUTF8Characters buffer;
357    buffer.characters = charactersStart;
358    buffer.hash = calculateStringHashAndLengthFromUTF8(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
359
360    if (!buffer.hash)
361        return nullAtom;
362
363    AtomicString atomicString;
364    atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
365    return atomicString;
366}
367
368} // namespace WTF
369