AtomicString.cpp revision cad810f21b803229eb11403f9209855525a25d57
1/*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB.  If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#include "config.h"
23
24#include "AtomicString.h"
25
26#include "StringHash.h"
27#include <wtf/HashSet.h>
28#include <wtf/Threading.h>
29#include <wtf/WTFThreadData.h>
30#include <wtf/unicode/UTF8.h>
31
32namespace WTF {
33
34using namespace Unicode;
35
36COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
37
38class AtomicStringTable {
39public:
40    static AtomicStringTable* create()
41    {
42        AtomicStringTable* table = new AtomicStringTable;
43
44        WTFThreadData& data = wtfThreadData();
45        data.m_atomicStringTable = table;
46        data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
47
48        return table;
49    }
50
51    HashSet<StringImpl*>& table()
52    {
53        return m_table;
54    }
55
56private:
57    static void destroy(AtomicStringTable* table)
58    {
59        HashSet<StringImpl*>::iterator end = table->m_table.end();
60        for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter)
61            (*iter)->setIsAtomic(false);
62        delete table;
63    }
64
65    HashSet<StringImpl*> m_table;
66};
67
68static inline HashSet<StringImpl*>& stringTable()
69{
70    // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
71    AtomicStringTable* table = wtfThreadData().atomicStringTable();
72    if (UNLIKELY(!table))
73        table = AtomicStringTable::create();
74    return table->table();
75}
76
77struct CStringTranslator {
78    static unsigned hash(const char* c)
79    {
80        return StringImpl::computeHash(c);
81    }
82
83    static bool equal(StringImpl* r, const char* s)
84    {
85        int length = r->length();
86        const UChar* d = r->characters();
87        for (int i = 0; i != length; ++i) {
88            unsigned char c = s[i];
89            if (d[i] != c)
90                return false;
91        }
92        return !s[length];
93    }
94
95    static void translate(StringImpl*& location, const char* const& c, unsigned hash)
96    {
97        location = StringImpl::create(c).leakRef();
98        location->setHash(hash);
99        location->setIsAtomic(true);
100    }
101};
102
103bool operator==(const AtomicString& a, const char* b)
104{
105    StringImpl* impl = a.impl();
106    if ((!impl || !impl->characters()) && !b)
107        return true;
108    if ((!impl || !impl->characters()) || !b)
109        return false;
110    return CStringTranslator::equal(impl, b);
111}
112
113PassRefPtr<StringImpl> AtomicString::add(const char* c)
114{
115    if (!c)
116        return 0;
117    if (!*c)
118        return StringImpl::empty();
119    pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<const char*, CStringTranslator>(c);
120    if (!addResult.second)
121        return *addResult.first;
122    return adoptRef(*addResult.first);
123}
124
125struct UCharBuffer {
126    const UChar* s;
127    unsigned length;
128};
129
130static inline bool equal(StringImpl* string, const UChar* characters, unsigned length)
131{
132    if (string->length() != length)
133        return false;
134
135    // FIXME: perhaps we should have a more abstract macro that indicates when
136    // going 4 bytes at a time is unsafe
137#if CPU(ARM) || CPU(SH4) || CPU(MIPS)
138    const UChar* stringCharacters = string->characters();
139    for (unsigned i = 0; i != length; ++i) {
140        if (*stringCharacters++ != *characters++)
141            return false;
142    }
143    return true;
144#else
145    /* Do it 4-bytes-at-a-time on architectures where it's safe */
146
147    const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters());
148    const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters);
149
150    unsigned halfLength = length >> 1;
151    for (unsigned i = 0; i != halfLength; ++i) {
152        if (*stringCharacters++ != *bufferCharacters++)
153            return false;
154    }
155
156    if (length & 1 &&  *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters))
157        return false;
158
159    return true;
160#endif
161}
162
163bool operator==(const AtomicString& string, const Vector<UChar>& vector)
164{
165    return string.impl() && equal(string.impl(), vector.data(), vector.size());
166}
167
168struct UCharBufferTranslator {
169    static unsigned hash(const UCharBuffer& buf)
170    {
171        return StringImpl::computeHash(buf.s, buf.length);
172    }
173
174    static bool equal(StringImpl* const& str, const UCharBuffer& buf)
175    {
176        return WTF::equal(str, buf.s, buf.length);
177    }
178
179    static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
180    {
181        location = StringImpl::create(buf.s, buf.length).leakRef();
182        location->setHash(hash);
183        location->setIsAtomic(true);
184    }
185};
186
187struct HashAndCharacters {
188    unsigned hash;
189    const UChar* characters;
190    unsigned length;
191};
192
193struct HashAndCharactersTranslator {
194    static unsigned hash(const HashAndCharacters& buffer)
195    {
196        ASSERT(buffer.hash == StringImpl::computeHash(buffer.characters, buffer.length));
197        return buffer.hash;
198    }
199
200    static bool equal(StringImpl* const& string, const HashAndCharacters& buffer)
201    {
202        return WTF::equal(string, buffer.characters, buffer.length);
203    }
204
205    static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash)
206    {
207        location = StringImpl::create(buffer.characters, buffer.length).leakRef();
208        location->setHash(hash);
209        location->setIsAtomic(true);
210    }
211};
212
213struct HashAndUTF8Characters {
214    unsigned hash;
215    const char* characters;
216    unsigned length;
217    unsigned utf16Length;
218};
219
220struct HashAndUTF8CharactersTranslator {
221    static unsigned hash(const HashAndUTF8Characters& buffer)
222    {
223        return buffer.hash;
224    }
225
226    static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
227    {
228        if (buffer.utf16Length != string->length())
229            return false;
230
231        const UChar* stringCharacters = string->characters();
232
233        // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
234        if (buffer.utf16Length != buffer.length)
235            return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length);
236
237        for (unsigned i = 0; i < buffer.length; ++i) {
238            ASSERT(isASCII(buffer.characters[i]));
239            if (stringCharacters[i] != buffer.characters[i])
240                return false;
241        }
242
243        return true;
244    }
245
246    static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
247    {
248        UChar* target;
249        location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef();
250
251        const char* source = buffer.characters;
252        if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK)
253            ASSERT_NOT_REACHED();
254
255        location->setHash(hash);
256        location->setIsAtomic(true);
257    }
258};
259
260PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
261{
262    if (!s)
263        return 0;
264
265    if (!length)
266        return StringImpl::empty();
267
268    UCharBuffer buf = { s, length };
269    pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<UCharBuffer, UCharBufferTranslator>(buf);
270
271    // If the string is newly-translated, then we need to adopt it.
272    // The boolean in the pair tells us if that is so.
273    return addResult.second ? adoptRef(*addResult.first) : *addResult.first;
274}
275
276PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
277{
278    ASSERT(s);
279    ASSERT(existingHash);
280
281    if (!length)
282        return StringImpl::empty();
283
284    HashAndCharacters buffer = { existingHash, s, length };
285    pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndCharacters, HashAndCharactersTranslator>(buffer);
286    if (!addResult.second)
287        return *addResult.first;
288    return adoptRef(*addResult.first);
289}
290
291PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
292{
293    if (!s)
294        return 0;
295
296    int length = 0;
297    while (s[length] != UChar(0))
298        length++;
299
300    if (!length)
301        return StringImpl::empty();
302
303    UCharBuffer buf = {s, length};
304    pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<UCharBuffer, UCharBufferTranslator>(buf);
305
306    // If the string is newly-translated, then we need to adopt it.
307    // The boolean in the pair tells us if that is so.
308    return addResult.second ? adoptRef(*addResult.first) : *addResult.first;
309}
310
311PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r)
312{
313    if (!r || r->isAtomic())
314        return r;
315
316    if (!r->length())
317        return StringImpl::empty();
318
319    StringImpl* result = *stringTable().add(r).first;
320    if (result == r)
321        r->setIsAtomic(true);
322    return result;
323}
324
325AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash)
326{
327    ASSERT(s);
328    ASSERT(existingHash);
329
330    if (!length)
331        return static_cast<AtomicStringImpl*>(StringImpl::empty());
332
333    HashAndCharacters buffer = { existingHash, s, length };
334    HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer);
335    if (iterator == stringTable().end())
336        return 0;
337    return static_cast<AtomicStringImpl*>(*iterator);
338}
339
340void AtomicString::remove(StringImpl* r)
341{
342    stringTable().remove(r);
343}
344
345AtomicString AtomicString::lower() const
346{
347    // Note: This is a hot function in the Dromaeo benchmark.
348    StringImpl* impl = this->impl();
349    if (UNLIKELY(!impl))
350        return *this;
351    RefPtr<StringImpl> newImpl = impl->lower();
352    if (LIKELY(newImpl == impl))
353        return *this;
354    return AtomicString(newImpl);
355}
356
357AtomicString AtomicString::fromUTF8(const char* characters, size_t length)
358{
359    if (!characters)
360        return AtomicString();
361
362    if (!length)
363        return emptyAtom;
364
365    HashAndUTF8Characters buffer;
366    buffer.characters = characters;
367    buffer.length = length;
368    buffer.hash = calculateStringHashFromUTF8(characters, characters + length, buffer.utf16Length);
369
370    if (!buffer.hash)
371        return AtomicString();
372
373    pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
374
375    // If the string is newly-translated, then we need to adopt it.
376    // The boolean in the pair tells us if that is so.
377    AtomicString atomicString;
378    atomicString.m_string = addResult.second ? adoptRef(*addResult.first) : *addResult.first;
379    return atomicString;
380}
381
382AtomicString AtomicString::fromUTF8(const char* characters)
383{
384    if (!characters)
385        return AtomicString();
386    return fromUTF8(characters, strlen(characters));
387}
388
389} // namespace WTF
390