1/* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22#include "config.h" 23 24#include "AtomicString.h" 25 26#include "StringHash.h" 27#include <wtf/HashSet.h> 28#include <wtf/Threading.h> 29#include <wtf/WTFThreadData.h> 30#include <wtf/unicode/UTF8.h> 31 32namespace WTF { 33 34using namespace Unicode; 35 36COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); 37 38class AtomicStringTable { 39public: 40 static AtomicStringTable* create() 41 { 42 AtomicStringTable* table = new AtomicStringTable; 43 44 WTFThreadData& data = wtfThreadData(); 45 data.m_atomicStringTable = table; 46 data.m_atomicStringTableDestructor = AtomicStringTable::destroy; 47 48 return table; 49 } 50 51 HashSet<StringImpl*>& table() 52 { 53 return m_table; 54 } 55 56private: 57 static void destroy(AtomicStringTable* table) 58 { 59 HashSet<StringImpl*>::iterator end = table->m_table.end(); 60 for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) 61 (*iter)->setIsAtomic(false); 62 delete table; 63 } 64 65 HashSet<StringImpl*> m_table; 66}; 67 68static inline HashSet<StringImpl*>& stringTable() 69{ 70 // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor). 71 AtomicStringTable* table = wtfThreadData().atomicStringTable(); 72 if (UNLIKELY(!table)) 73 table = AtomicStringTable::create(); 74 return table->table(); 75} 76 77template<typename T, typename HashTranslator> 78static inline PassRefPtr<StringImpl> addToStringTable(const T& value) 79{ 80 pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<T, HashTranslator>(value); 81 82 // If the string is newly-translated, then we need to adopt it. 83 // The boolean in the pair tells us if that is so. 84 return addResult.second ? adoptRef(*addResult.first) : *addResult.first; 85} 86 87struct CStringTranslator { 88 static unsigned hash(const char* c) 89 { 90 return StringHasher::computeHash(c); 91 } 92 93 static bool equal(StringImpl* r, const char* s) 94 { 95 int length = r->length(); 96 const UChar* d = r->characters(); 97 for (int i = 0; i != length; ++i) { 98 unsigned char c = s[i]; 99 if (d[i] != c) 100 return false; 101 } 102 return !s[length]; 103 } 104 105 static void translate(StringImpl*& location, const char* const& c, unsigned hash) 106 { 107 location = StringImpl::create(c).leakRef(); 108 location->setHash(hash); 109 location->setIsAtomic(true); 110 } 111}; 112 113bool operator==(const AtomicString& a, const char* b) 114{ 115 StringImpl* impl = a.impl(); 116 if ((!impl || !impl->characters()) && !b) 117 return true; 118 if ((!impl || !impl->characters()) || !b) 119 return false; 120 return CStringTranslator::equal(impl, b); 121} 122 123PassRefPtr<StringImpl> AtomicString::add(const char* c) 124{ 125 if (!c) 126 return 0; 127 if (!*c) 128 return StringImpl::empty(); 129 130 return addToStringTable<const char*, CStringTranslator>(c); 131} 132 133struct UCharBuffer { 134 const UChar* s; 135 unsigned length; 136}; 137 138static inline bool equal(StringImpl* string, const UChar* characters, unsigned length) 139{ 140 if (string->length() != length) 141 return false; 142 143 // FIXME: perhaps we should have a more abstract macro that indicates when 144 // going 4 bytes at a time is unsafe 145#if CPU(ARM) || CPU(SH4) || CPU(MIPS) || CPU(SPARC) 146 const UChar* stringCharacters = string->characters(); 147 for (unsigned i = 0; i != length; ++i) { 148 if (*stringCharacters++ != *characters++) 149 return false; 150 } 151 return true; 152#else 153 /* Do it 4-bytes-at-a-time on architectures where it's safe */ 154 155 const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters()); 156 const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters); 157 158 unsigned halfLength = length >> 1; 159 for (unsigned i = 0; i != halfLength; ++i) { 160 if (*stringCharacters++ != *bufferCharacters++) 161 return false; 162 } 163 164 if (length & 1 && *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters)) 165 return false; 166 167 return true; 168#endif 169} 170 171bool operator==(const AtomicString& string, const Vector<UChar>& vector) 172{ 173 return string.impl() && equal(string.impl(), vector.data(), vector.size()); 174} 175 176struct UCharBufferTranslator { 177 static unsigned hash(const UCharBuffer& buf) 178 { 179 return StringHasher::computeHash(buf.s, buf.length); 180 } 181 182 static bool equal(StringImpl* const& str, const UCharBuffer& buf) 183 { 184 return WTF::equal(str, buf.s, buf.length); 185 } 186 187 static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) 188 { 189 location = StringImpl::create(buf.s, buf.length).leakRef(); 190 location->setHash(hash); 191 location->setIsAtomic(true); 192 } 193}; 194 195struct HashAndCharacters { 196 unsigned hash; 197 const UChar* characters; 198 unsigned length; 199}; 200 201struct HashAndCharactersTranslator { 202 static unsigned hash(const HashAndCharacters& buffer) 203 { 204 ASSERT(buffer.hash == StringHasher::computeHash(buffer.characters, buffer.length)); 205 return buffer.hash; 206 } 207 208 static bool equal(StringImpl* const& string, const HashAndCharacters& buffer) 209 { 210 return WTF::equal(string, buffer.characters, buffer.length); 211 } 212 213 static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash) 214 { 215 location = StringImpl::create(buffer.characters, buffer.length).leakRef(); 216 location->setHash(hash); 217 location->setIsAtomic(true); 218 } 219}; 220 221struct HashAndUTF8Characters { 222 unsigned hash; 223 const char* characters; 224 unsigned length; 225 unsigned utf16Length; 226}; 227 228struct HashAndUTF8CharactersTranslator { 229 static unsigned hash(const HashAndUTF8Characters& buffer) 230 { 231 return buffer.hash; 232 } 233 234 static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) 235 { 236 if (buffer.utf16Length != string->length()) 237 return false; 238 239 const UChar* stringCharacters = string->characters(); 240 241 // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. 242 if (buffer.utf16Length != buffer.length) 243 return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length); 244 245 for (unsigned i = 0; i < buffer.length; ++i) { 246 ASSERT(isASCII(buffer.characters[i])); 247 if (stringCharacters[i] != buffer.characters[i]) 248 return false; 249 } 250 251 return true; 252 } 253 254 static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) 255 { 256 UChar* target; 257 location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef(); 258 259 const char* source = buffer.characters; 260 if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK) 261 ASSERT_NOT_REACHED(); 262 263 location->setHash(hash); 264 location->setIsAtomic(true); 265 } 266}; 267 268PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) 269{ 270 if (!s) 271 return 0; 272 273 if (!length) 274 return StringImpl::empty(); 275 276 UCharBuffer buffer = { s, length }; 277 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 278} 279 280PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) 281{ 282 ASSERT(s); 283 ASSERT(existingHash); 284 285 if (!length) 286 return StringImpl::empty(); 287 288 HashAndCharacters buffer = { existingHash, s, length }; 289 return addToStringTable<HashAndCharacters, HashAndCharactersTranslator>(buffer); 290} 291 292PassRefPtr<StringImpl> AtomicString::add(const UChar* s) 293{ 294 if (!s) 295 return 0; 296 297 int length = 0; 298 while (s[length] != UChar(0)) 299 length++; 300 301 if (!length) 302 return StringImpl::empty(); 303 304 UCharBuffer buffer = { s, length }; 305 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 306} 307 308PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r) 309{ 310 if (!r || r->isAtomic()) 311 return r; 312 313 if (!r->length()) 314 return StringImpl::empty(); 315 316 StringImpl* result = *stringTable().add(r).first; 317 if (result == r) 318 r->setIsAtomic(true); 319 return result; 320} 321 322AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash) 323{ 324 ASSERT(s); 325 ASSERT(existingHash); 326 327 if (!length) 328 return static_cast<AtomicStringImpl*>(StringImpl::empty()); 329 330 HashAndCharacters buffer = { existingHash, s, length }; 331 HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer); 332 if (iterator == stringTable().end()) 333 return 0; 334 return static_cast<AtomicStringImpl*>(*iterator); 335} 336 337void AtomicString::remove(StringImpl* r) 338{ 339 stringTable().remove(r); 340} 341 342AtomicString AtomicString::lower() const 343{ 344 // Note: This is a hot function in the Dromaeo benchmark. 345 StringImpl* impl = this->impl(); 346 if (UNLIKELY(!impl)) 347 return *this; 348 RefPtr<StringImpl> newImpl = impl->lower(); 349 if (LIKELY(newImpl == impl)) 350 return *this; 351 return AtomicString(newImpl); 352} 353 354AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd) 355{ 356 HashAndUTF8Characters buffer; 357 buffer.characters = charactersStart; 358 buffer.hash = calculateStringHashAndLengthFromUTF8(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); 359 360 if (!buffer.hash) 361 return nullAtom; 362 363 AtomicString atomicString; 364 atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); 365 return atomicString; 366} 367 368} // namespace WTF 369