1/* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> 4 * Copyright (C) 2012 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23#include "config.h" 24#include "AtomicString.h" 25 26#include "StringHash.h" 27#include "wtf/HashSet.h" 28#include "wtf/WTFThreadData.h" 29#include "wtf/unicode/UTF8.h" 30 31namespace WTF { 32 33using namespace Unicode; 34 35COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); 36 37class AtomicStringTable { 38public: 39 static AtomicStringTable* create(WTFThreadData& data) 40 { 41 data.m_atomicStringTable = new AtomicStringTable; 42 data.m_atomicStringTableDestructor = AtomicStringTable::destroy; 43 return data.m_atomicStringTable; 44 } 45 46 HashSet<StringImpl*>& table() 47 { 48 return m_table; 49 } 50 51private: 52 static void destroy(AtomicStringTable* table) 53 { 54 HashSet<StringImpl*>::iterator end = table->m_table.end(); 55 for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) 56 (*iter)->setIsAtomic(false); 57 delete table; 58 } 59 60 HashSet<StringImpl*> m_table; 61}; 62 63static inline HashSet<StringImpl*>& stringTable() 64{ 65 // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor). 66 WTFThreadData& data = wtfThreadData(); 67 AtomicStringTable* table = data.atomicStringTable(); 68 if (UNLIKELY(!table)) 69 table = AtomicStringTable::create(data); 70 return table->table(); 71} 72 73template<typename T, typename HashTranslator> 74static inline PassRefPtr<StringImpl> addToStringTable(const T& value) 75{ 76 HashSet<StringImpl*>::AddResult addResult = stringTable().add<HashTranslator>(value); 77 78 // If the string is newly-translated, then we need to adopt it. 79 // The boolean in the pair tells us if that is so. 80 return addResult.isNewEntry ? adoptRef(*addResult.iterator) : *addResult.iterator; 81} 82 83struct CStringTranslator { 84 static unsigned hash(const LChar* c) 85 { 86 return StringHasher::computeHashAndMaskTop8Bits(c); 87 } 88 89 static inline bool equal(StringImpl* r, const LChar* s) 90 { 91 return WTF::equal(r, s); 92 } 93 94 static void translate(StringImpl*& location, const LChar* const& c, unsigned hash) 95 { 96 location = StringImpl::create(c).leakRef(); 97 location->setHash(hash); 98 location->setIsAtomic(true); 99 } 100}; 101 102PassRefPtr<StringImpl> AtomicString::add(const LChar* c) 103{ 104 if (!c) 105 return 0; 106 if (!*c) 107 return StringImpl::empty(); 108 109 return addToStringTable<const LChar*, CStringTranslator>(c); 110} 111 112template<typename CharacterType> 113struct HashTranslatorCharBuffer { 114 const CharacterType* s; 115 unsigned length; 116}; 117 118typedef HashTranslatorCharBuffer<UChar> UCharBuffer; 119struct UCharBufferTranslator { 120 static unsigned hash(const UCharBuffer& buf) 121 { 122 return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); 123 } 124 125 static bool equal(StringImpl* const& str, const UCharBuffer& buf) 126 { 127 return WTF::equal(str, buf.s, buf.length); 128 } 129 130 static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) 131 { 132 location = StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef(); 133 location->setHash(hash); 134 location->setIsAtomic(true); 135 } 136}; 137 138template<typename CharacterType> 139struct HashAndCharacters { 140 unsigned hash; 141 const CharacterType* characters; 142 unsigned length; 143}; 144 145template<typename CharacterType> 146struct HashAndCharactersTranslator { 147 static unsigned hash(const HashAndCharacters<CharacterType>& buffer) 148 { 149 ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length)); 150 return buffer.hash; 151 } 152 153 static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer) 154 { 155 return WTF::equal(string, buffer.characters, buffer.length); 156 } 157 158 static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash) 159 { 160 location = StringImpl::create(buffer.characters, buffer.length).leakRef(); 161 location->setHash(hash); 162 location->setIsAtomic(true); 163 } 164}; 165 166struct HashAndUTF8Characters { 167 unsigned hash; 168 const char* characters; 169 unsigned length; 170 unsigned utf16Length; 171}; 172 173struct HashAndUTF8CharactersTranslator { 174 static unsigned hash(const HashAndUTF8Characters& buffer) 175 { 176 return buffer.hash; 177 } 178 179 static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) 180 { 181 if (buffer.utf16Length != string->length()) 182 return false; 183 184 // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. 185 if (buffer.utf16Length != buffer.length) { 186 if (string->is8Bit()) { 187 const LChar* characters8 = string->characters8(); 188 return equalLatin1WithUTF8(characters8, characters8 + string->length(), buffer.characters, buffer.characters + buffer.length); 189 } 190 const UChar* characters16 = string->characters16(); 191 return equalUTF16WithUTF8(characters16, characters16 + string->length(), buffer.characters, buffer.characters + buffer.length); 192 } 193 194 if (string->is8Bit()) { 195 const LChar* stringCharacters = string->characters8(); 196 197 for (unsigned i = 0; i < buffer.length; ++i) { 198 ASSERT(isASCII(buffer.characters[i])); 199 if (stringCharacters[i] != buffer.characters[i]) 200 return false; 201 } 202 203 return true; 204 } 205 206 const UChar* stringCharacters = string->characters16(); 207 208 for (unsigned i = 0; i < buffer.length; ++i) { 209 ASSERT(isASCII(buffer.characters[i])); 210 if (stringCharacters[i] != buffer.characters[i]) 211 return false; 212 } 213 214 return true; 215 } 216 217 static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) 218 { 219 UChar* target; 220 RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target); 221 222 bool isAllASCII; 223 const char* source = buffer.characters; 224 if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK) 225 ASSERT_NOT_REACHED(); 226 227 if (isAllASCII) 228 newString = StringImpl::create(buffer.characters, buffer.length); 229 230 location = newString.release().leakRef(); 231 location->setHash(hash); 232 location->setIsAtomic(true); 233 } 234}; 235 236PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) 237{ 238 if (!s) 239 return 0; 240 241 if (!length) 242 return StringImpl::empty(); 243 244 UCharBuffer buffer = { s, length }; 245 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 246} 247 248PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) 249{ 250 ASSERT(s); 251 ASSERT(existingHash); 252 253 if (!length) 254 return StringImpl::empty(); 255 256 HashAndCharacters<UChar> buffer = { existingHash, s, length }; 257 return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar> >(buffer); 258} 259 260PassRefPtr<StringImpl> AtomicString::add(const UChar* s) 261{ 262 if (!s) 263 return 0; 264 265 unsigned length = 0; 266 while (s[length] != UChar(0)) 267 ++length; 268 269 if (!length) 270 return StringImpl::empty(); 271 272 UCharBuffer buffer = { s, length }; 273 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer); 274} 275 276struct SubstringLocation { 277 StringImpl* baseString; 278 unsigned start; 279 unsigned length; 280}; 281 282struct SubstringTranslator { 283 static unsigned hash(const SubstringLocation& buffer) 284 { 285 if (buffer.baseString->is8Bit()) 286 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length); 287 return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length); 288 } 289 290 static bool equal(StringImpl* const& string, const SubstringLocation& buffer) 291 { 292 if (buffer.baseString->is8Bit()) 293 return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length); 294 return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length); 295 } 296 297 static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash) 298 { 299 location = buffer.baseString->substring(buffer.start, buffer.length).leakRef(); 300 location->setHash(hash); 301 location->setIsAtomic(true); 302 } 303}; 304 305PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length) 306{ 307 if (!baseString) 308 return 0; 309 310 if (!length || start >= baseString->length()) 311 return StringImpl::empty(); 312 313 unsigned maxLength = baseString->length() - start; 314 if (length >= maxLength) { 315 if (!start) 316 return add(baseString); 317 length = maxLength; 318 } 319 320 SubstringLocation buffer = { baseString, start, length }; 321 return addToStringTable<SubstringLocation, SubstringTranslator>(buffer); 322} 323 324typedef HashTranslatorCharBuffer<LChar> LCharBuffer; 325struct LCharBufferTranslator { 326 static unsigned hash(const LCharBuffer& buf) 327 { 328 return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length); 329 } 330 331 static bool equal(StringImpl* const& str, const LCharBuffer& buf) 332 { 333 return WTF::equal(str, buf.s, buf.length); 334 } 335 336 static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash) 337 { 338 location = StringImpl::create(buf.s, buf.length).leakRef(); 339 location->setHash(hash); 340 location->setIsAtomic(true); 341 } 342}; 343 344typedef HashTranslatorCharBuffer<char> CharBuffer; 345struct CharBufferFromLiteralDataTranslator { 346 static unsigned hash(const CharBuffer& buf) 347 { 348 return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length); 349 } 350 351 static bool equal(StringImpl* const& str, const CharBuffer& buf) 352 { 353 return WTF::equal(str, buf.s, buf.length); 354 } 355 356 static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash) 357 { 358 location = StringImpl::create(buf.s, buf.length).leakRef(); 359 location->setHash(hash); 360 location->setIsAtomic(true); 361 } 362}; 363 364PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length) 365{ 366 if (!s) 367 return 0; 368 369 if (!length) 370 return StringImpl::empty(); 371 372 LCharBuffer buffer = { s, length }; 373 return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer); 374} 375 376PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length) 377{ 378 ASSERT(characters); 379 ASSERT(length); 380 381 CharBuffer buffer = { characters, length }; 382 return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer); 383} 384 385PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r) 386{ 387 if (!r->length()) 388 return StringImpl::empty(); 389 390 StringImpl* result = *stringTable().add(r).iterator; 391 if (result == r) 392 r->setIsAtomic(true); 393 ASSERT(!r->isStatic() || result->isStatic()); 394 return result; 395} 396 397template<typename CharacterType> 398static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl) 399{ 400 HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() }; 401 return stringTable().find<HashAndCharactersTranslator<CharacterType> >(buffer); 402} 403 404StringImpl* AtomicString::find(const StringImpl* stringImpl) 405{ 406 ASSERT(stringImpl); 407 ASSERT(stringImpl->existingHash()); 408 409 if (!stringImpl->length()) 410 return StringImpl::empty(); 411 412 HashSet<StringImpl*>::iterator iterator; 413 if (stringImpl->is8Bit()) 414 iterator = findString<LChar>(stringImpl); 415 else 416 iterator = findString<UChar>(stringImpl); 417 if (iterator == stringTable().end()) 418 return 0; 419 return *iterator; 420} 421 422void AtomicString::remove(StringImpl* r) 423{ 424 HashSet<StringImpl*>::iterator iterator; 425 if (r->is8Bit()) 426 iterator = findString<LChar>(r); 427 else 428 iterator = findString<UChar>(r); 429 RELEASE_ASSERT(iterator != stringTable().end()); 430 stringTable().remove(iterator); 431} 432 433AtomicString AtomicString::lower() const 434{ 435 // Note: This is a hot function in the Dromaeo benchmark. 436 StringImpl* impl = this->impl(); 437 if (UNLIKELY(!impl)) 438 return *this; 439 RefPtr<StringImpl> newImpl = impl->lower(); 440 if (LIKELY(newImpl == impl)) 441 return *this; 442 return AtomicString(newImpl); 443} 444 445AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd) 446{ 447 HashAndUTF8Characters buffer; 448 buffer.characters = charactersStart; 449 buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length); 450 451 if (!buffer.hash) 452 return nullAtom; 453 454 AtomicString atomicString; 455 atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); 456 return atomicString; 457} 458 459#ifndef NDEBUG 460void AtomicString::show() const 461{ 462 m_string.show(); 463} 464#endif 465 466} // namespace WTF 467