AtomicString.cpp revision cad810f21b803229eb11403f9209855525a25d57
1/* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22#include "config.h" 23 24#include "AtomicString.h" 25 26#include "StringHash.h" 27#include <wtf/HashSet.h> 28#include <wtf/Threading.h> 29#include <wtf/WTFThreadData.h> 30#include <wtf/unicode/UTF8.h> 31 32namespace WTF { 33 34using namespace Unicode; 35 36COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); 37 38class AtomicStringTable { 39public: 40 static AtomicStringTable* create() 41 { 42 AtomicStringTable* table = new AtomicStringTable; 43 44 WTFThreadData& data = wtfThreadData(); 45 data.m_atomicStringTable = table; 46 data.m_atomicStringTableDestructor = AtomicStringTable::destroy; 47 48 return table; 49 } 50 51 HashSet<StringImpl*>& table() 52 { 53 return m_table; 54 } 55 56private: 57 static void destroy(AtomicStringTable* table) 58 { 59 HashSet<StringImpl*>::iterator end = table->m_table.end(); 60 for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter) 61 (*iter)->setIsAtomic(false); 62 delete table; 63 } 64 65 HashSet<StringImpl*> m_table; 66}; 67 68static inline HashSet<StringImpl*>& stringTable() 69{ 70 // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor). 71 AtomicStringTable* table = wtfThreadData().atomicStringTable(); 72 if (UNLIKELY(!table)) 73 table = AtomicStringTable::create(); 74 return table->table(); 75} 76 77struct CStringTranslator { 78 static unsigned hash(const char* c) 79 { 80 return StringImpl::computeHash(c); 81 } 82 83 static bool equal(StringImpl* r, const char* s) 84 { 85 int length = r->length(); 86 const UChar* d = r->characters(); 87 for (int i = 0; i != length; ++i) { 88 unsigned char c = s[i]; 89 if (d[i] != c) 90 return false; 91 } 92 return !s[length]; 93 } 94 95 static void translate(StringImpl*& location, const char* const& c, unsigned hash) 96 { 97 location = StringImpl::create(c).leakRef(); 98 location->setHash(hash); 99 location->setIsAtomic(true); 100 } 101}; 102 103bool operator==(const AtomicString& a, const char* b) 104{ 105 StringImpl* impl = a.impl(); 106 if ((!impl || !impl->characters()) && !b) 107 return true; 108 if ((!impl || !impl->characters()) || !b) 109 return false; 110 return CStringTranslator::equal(impl, b); 111} 112 113PassRefPtr<StringImpl> AtomicString::add(const char* c) 114{ 115 if (!c) 116 return 0; 117 if (!*c) 118 return StringImpl::empty(); 119 pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<const char*, CStringTranslator>(c); 120 if (!addResult.second) 121 return *addResult.first; 122 return adoptRef(*addResult.first); 123} 124 125struct UCharBuffer { 126 const UChar* s; 127 unsigned length; 128}; 129 130static inline bool equal(StringImpl* string, const UChar* characters, unsigned length) 131{ 132 if (string->length() != length) 133 return false; 134 135 // FIXME: perhaps we should have a more abstract macro that indicates when 136 // going 4 bytes at a time is unsafe 137#if CPU(ARM) || CPU(SH4) || CPU(MIPS) 138 const UChar* stringCharacters = string->characters(); 139 for (unsigned i = 0; i != length; ++i) { 140 if (*stringCharacters++ != *characters++) 141 return false; 142 } 143 return true; 144#else 145 /* Do it 4-bytes-at-a-time on architectures where it's safe */ 146 147 const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters()); 148 const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters); 149 150 unsigned halfLength = length >> 1; 151 for (unsigned i = 0; i != halfLength; ++i) { 152 if (*stringCharacters++ != *bufferCharacters++) 153 return false; 154 } 155 156 if (length & 1 && *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters)) 157 return false; 158 159 return true; 160#endif 161} 162 163bool operator==(const AtomicString& string, const Vector<UChar>& vector) 164{ 165 return string.impl() && equal(string.impl(), vector.data(), vector.size()); 166} 167 168struct UCharBufferTranslator { 169 static unsigned hash(const UCharBuffer& buf) 170 { 171 return StringImpl::computeHash(buf.s, buf.length); 172 } 173 174 static bool equal(StringImpl* const& str, const UCharBuffer& buf) 175 { 176 return WTF::equal(str, buf.s, buf.length); 177 } 178 179 static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash) 180 { 181 location = StringImpl::create(buf.s, buf.length).leakRef(); 182 location->setHash(hash); 183 location->setIsAtomic(true); 184 } 185}; 186 187struct HashAndCharacters { 188 unsigned hash; 189 const UChar* characters; 190 unsigned length; 191}; 192 193struct HashAndCharactersTranslator { 194 static unsigned hash(const HashAndCharacters& buffer) 195 { 196 ASSERT(buffer.hash == StringImpl::computeHash(buffer.characters, buffer.length)); 197 return buffer.hash; 198 } 199 200 static bool equal(StringImpl* const& string, const HashAndCharacters& buffer) 201 { 202 return WTF::equal(string, buffer.characters, buffer.length); 203 } 204 205 static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash) 206 { 207 location = StringImpl::create(buffer.characters, buffer.length).leakRef(); 208 location->setHash(hash); 209 location->setIsAtomic(true); 210 } 211}; 212 213struct HashAndUTF8Characters { 214 unsigned hash; 215 const char* characters; 216 unsigned length; 217 unsigned utf16Length; 218}; 219 220struct HashAndUTF8CharactersTranslator { 221 static unsigned hash(const HashAndUTF8Characters& buffer) 222 { 223 return buffer.hash; 224 } 225 226 static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer) 227 { 228 if (buffer.utf16Length != string->length()) 229 return false; 230 231 const UChar* stringCharacters = string->characters(); 232 233 // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same. 234 if (buffer.utf16Length != buffer.length) 235 return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length); 236 237 for (unsigned i = 0; i < buffer.length; ++i) { 238 ASSERT(isASCII(buffer.characters[i])); 239 if (stringCharacters[i] != buffer.characters[i]) 240 return false; 241 } 242 243 return true; 244 } 245 246 static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash) 247 { 248 UChar* target; 249 location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef(); 250 251 const char* source = buffer.characters; 252 if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK) 253 ASSERT_NOT_REACHED(); 254 255 location->setHash(hash); 256 location->setIsAtomic(true); 257 } 258}; 259 260PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length) 261{ 262 if (!s) 263 return 0; 264 265 if (!length) 266 return StringImpl::empty(); 267 268 UCharBuffer buf = { s, length }; 269 pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<UCharBuffer, UCharBufferTranslator>(buf); 270 271 // If the string is newly-translated, then we need to adopt it. 272 // The boolean in the pair tells us if that is so. 273 return addResult.second ? adoptRef(*addResult.first) : *addResult.first; 274} 275 276PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash) 277{ 278 ASSERT(s); 279 ASSERT(existingHash); 280 281 if (!length) 282 return StringImpl::empty(); 283 284 HashAndCharacters buffer = { existingHash, s, length }; 285 pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndCharacters, HashAndCharactersTranslator>(buffer); 286 if (!addResult.second) 287 return *addResult.first; 288 return adoptRef(*addResult.first); 289} 290 291PassRefPtr<StringImpl> AtomicString::add(const UChar* s) 292{ 293 if (!s) 294 return 0; 295 296 int length = 0; 297 while (s[length] != UChar(0)) 298 length++; 299 300 if (!length) 301 return StringImpl::empty(); 302 303 UCharBuffer buf = {s, length}; 304 pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<UCharBuffer, UCharBufferTranslator>(buf); 305 306 // If the string is newly-translated, then we need to adopt it. 307 // The boolean in the pair tells us if that is so. 308 return addResult.second ? adoptRef(*addResult.first) : *addResult.first; 309} 310 311PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r) 312{ 313 if (!r || r->isAtomic()) 314 return r; 315 316 if (!r->length()) 317 return StringImpl::empty(); 318 319 StringImpl* result = *stringTable().add(r).first; 320 if (result == r) 321 r->setIsAtomic(true); 322 return result; 323} 324 325AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash) 326{ 327 ASSERT(s); 328 ASSERT(existingHash); 329 330 if (!length) 331 return static_cast<AtomicStringImpl*>(StringImpl::empty()); 332 333 HashAndCharacters buffer = { existingHash, s, length }; 334 HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer); 335 if (iterator == stringTable().end()) 336 return 0; 337 return static_cast<AtomicStringImpl*>(*iterator); 338} 339 340void AtomicString::remove(StringImpl* r) 341{ 342 stringTable().remove(r); 343} 344 345AtomicString AtomicString::lower() const 346{ 347 // Note: This is a hot function in the Dromaeo benchmark. 348 StringImpl* impl = this->impl(); 349 if (UNLIKELY(!impl)) 350 return *this; 351 RefPtr<StringImpl> newImpl = impl->lower(); 352 if (LIKELY(newImpl == impl)) 353 return *this; 354 return AtomicString(newImpl); 355} 356 357AtomicString AtomicString::fromUTF8(const char* characters, size_t length) 358{ 359 if (!characters) 360 return AtomicString(); 361 362 if (!length) 363 return emptyAtom; 364 365 HashAndUTF8Characters buffer; 366 buffer.characters = characters; 367 buffer.length = length; 368 buffer.hash = calculateStringHashFromUTF8(characters, characters + length, buffer.utf16Length); 369 370 if (!buffer.hash) 371 return AtomicString(); 372 373 pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer); 374 375 // If the string is newly-translated, then we need to adopt it. 376 // The boolean in the pair tells us if that is so. 377 AtomicString atomicString; 378 atomicString.m_string = addResult.second ? adoptRef(*addResult.first) : *addResult.first; 379 return atomicString; 380} 381 382AtomicString AtomicString::fromUTF8(const char* characters) 383{ 384 if (!characters) 385 return AtomicString(); 386 return fromUTF8(characters, strlen(characters)); 387} 388 389} // namespace WTF 390