1d0825bca7fe65beaee391d30da42e937db621564Steve Block/* 2d0825bca7fe65beaee391d30da42e937db621564Steve Block * Copyright (C) 2005, 2006, 2008, 2010 Apple Inc. All rights reserved. 3bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> 4d0825bca7fe65beaee391d30da42e937db621564Steve Block * 5d0825bca7fe65beaee391d30da42e937db621564Steve Block * This library is free software; you can redistribute it and/or 6d0825bca7fe65beaee391d30da42e937db621564Steve Block * modify it under the terms of the GNU Library General Public 7d0825bca7fe65beaee391d30da42e937db621564Steve Block * License as published by the Free Software Foundation; either 8d0825bca7fe65beaee391d30da42e937db621564Steve Block * version 2 of the License, or (at your option) any later version. 9d0825bca7fe65beaee391d30da42e937db621564Steve Block * 10d0825bca7fe65beaee391d30da42e937db621564Steve Block * This library is distributed in the hope that it will be useful, 11d0825bca7fe65beaee391d30da42e937db621564Steve Block * but WITHOUT ANY WARRANTY; without even the implied warranty of 12d0825bca7fe65beaee391d30da42e937db621564Steve Block * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13d0825bca7fe65beaee391d30da42e937db621564Steve Block * Library General Public License for more details. 14d0825bca7fe65beaee391d30da42e937db621564Steve Block * 15d0825bca7fe65beaee391d30da42e937db621564Steve Block * You should have received a copy of the GNU Library General Public License 16d0825bca7fe65beaee391d30da42e937db621564Steve Block * along with this library; see the file COPYING.LIB. If not, write to 17d0825bca7fe65beaee391d30da42e937db621564Steve Block * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18d0825bca7fe65beaee391d30da42e937db621564Steve Block * Boston, MA 02110-1301, USA. 19d0825bca7fe65beaee391d30da42e937db621564Steve Block * 20d0825bca7fe65beaee391d30da42e937db621564Steve Block */ 21a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch#ifndef WTF_StringHasher_h 22a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch#define WTF_StringHasher_h 23d0825bca7fe65beaee391d30da42e937db621564Steve Block 24d0825bca7fe65beaee391d30da42e937db621564Steve Block#include <wtf/unicode/Unicode.h> 25d0825bca7fe65beaee391d30da42e937db621564Steve Block 26d0825bca7fe65beaee391d30da42e937db621564Steve Blocknamespace WTF { 27d0825bca7fe65beaee391d30da42e937db621564Steve Block 28d0825bca7fe65beaee391d30da42e937db621564Steve Block// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's 29d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic const unsigned stringHashingStartValue = 0x9e3779b9U; 30d0825bca7fe65beaee391d30da42e937db621564Steve Block 31bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen// Paul Hsieh's SuperFastHash 32d0825bca7fe65beaee391d30da42e937db621564Steve Block// http://www.azillionmonkeys.com/qed/hash.html 33d0825bca7fe65beaee391d30da42e937db621564Steve Block// char* data is interpreted as latin-encoded (zero extended to 16 bits). 34bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsenclass StringHasher { 35bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsenpublic: 36bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen inline StringHasher() 37bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen : m_hash(stringHashingStartValue) 38bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen , m_hasPendingCharacter(false) 39bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen , m_pendingCharacter(0) 40bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 41bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 42d0825bca7fe65beaee391d30da42e937db621564Steve Block 43bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen inline void addCharacters(UChar a, UChar b) 44bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 45bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen ASSERT(!m_hasPendingCharacter); 46bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen addCharactersToHash(a, b); 47d0825bca7fe65beaee391d30da42e937db621564Steve Block } 48d0825bca7fe65beaee391d30da42e937db621564Steve Block 49bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen inline void addCharacter(UChar ch) 50bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 51bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen if (m_hasPendingCharacter) { 52bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen addCharactersToHash(m_pendingCharacter, ch); 53bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen m_hasPendingCharacter = false; 54bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen return; 55bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 56bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 57bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen m_pendingCharacter = ch; 58bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen m_hasPendingCharacter = true; 59d0825bca7fe65beaee391d30da42e937db621564Steve Block } 60d0825bca7fe65beaee391d30da42e937db621564Steve Block 61bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen inline unsigned hash() const 62bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 63bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen unsigned result = m_hash; 64bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 65bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen // Handle end case. 66bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen if (m_hasPendingCharacter) { 67bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen result += m_pendingCharacter; 68bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen result ^= result << 11; 69bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen result += result >> 17; 70bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 71bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 72bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen // Force "avalanching" of final 31 bits. 73bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen result ^= result << 3; 74bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen result += result >> 5; 75bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen result ^= result << 2; 76bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen result += result >> 15; 77bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen result ^= result << 10; 78bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 79bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen // First bit is used in UStringImpl for m_isIdentifier. 80bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen result &= 0x7fffffff; 81bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 82bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen // This avoids ever returning a hash code of 0, since that is used to 83bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen // signal "hash not computed yet", using a value that is likely to be 84bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen // effectively the same as 0 when the low bits are masked. 85bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen if (!result) 86bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen return 0x40000000; 87bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 88bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen return result; 89d0825bca7fe65beaee391d30da42e937db621564Steve Block } 90d0825bca7fe65beaee391d30da42e937db621564Steve Block 912bde8e466a4451c7319e3a072d118917957d6554Steve Block template<typename T, UChar Converter(T)> static inline unsigned computeHash(const T* data, unsigned length) 92bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 93bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen StringHasher hasher; 94bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen bool rem = length & 1; 95bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen length >>= 1; 96bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 97bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen while (length--) { 98bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen hasher.addCharacters(Converter(data[0]), Converter(data[1])); 99bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen data += 2; 100bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 101bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 102bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen if (rem) 103bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen hasher.addCharacter(Converter(*data)); 104bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 105bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen return hasher.hash(); 106d0825bca7fe65beaee391d30da42e937db621564Steve Block } 107d0825bca7fe65beaee391d30da42e937db621564Steve Block 1082bde8e466a4451c7319e3a072d118917957d6554Steve Block template<typename T, UChar Converter(T)> static inline unsigned computeHash(const T* data) 109bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 110bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen StringHasher hasher; 111bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 112bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen while (true) { 113bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen UChar b0 = Converter(*data++); 114bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen if (!b0) 115bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen break; 116bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen UChar b1 = Converter(*data++); 117bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen if (!b1) { 118bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen hasher.addCharacter(b0); 119bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen break; 120bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 121bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 122bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen hasher.addCharacters(b0, b1); 123d0825bca7fe65beaee391d30da42e937db621564Steve Block } 124bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 125bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen return hasher.hash(); 126bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 127bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 1282bde8e466a4451c7319e3a072d118917957d6554Steve Block template<typename T> static inline unsigned computeHash(const T* data, unsigned length) 129bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 1302bde8e466a4451c7319e3a072d118917957d6554Steve Block return computeHash<T, defaultCoverter>(data, length); 131d0825bca7fe65beaee391d30da42e937db621564Steve Block } 132d0825bca7fe65beaee391d30da42e937db621564Steve Block 1332bde8e466a4451c7319e3a072d118917957d6554Steve Block template<typename T> static inline unsigned computeHash(const T* data) 134bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 1352bde8e466a4451c7319e3a072d118917957d6554Steve Block return computeHash<T, defaultCoverter>(data); 136bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 137d0825bca7fe65beaee391d30da42e937db621564Steve Block 1382bde8e466a4451c7319e3a072d118917957d6554Steve Block template<size_t length> static inline unsigned hashMemory(const void* data) 139bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 140bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen COMPILE_ASSERT(!(length % 4), length_must_be_a_multible_of_four); 1412bde8e466a4451c7319e3a072d118917957d6554Steve Block return computeHash<UChar>(static_cast<const UChar*>(data), length / sizeof(UChar)); 142bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 143d0825bca7fe65beaee391d30da42e937db621564Steve Block 1442bde8e466a4451c7319e3a072d118917957d6554Steve Block static inline unsigned hashMemory(const void* data, unsigned size) 1452fc2651226baac27029e38c9d6ef883fa32084dbSteve Block { 1462fc2651226baac27029e38c9d6ef883fa32084dbSteve Block ASSERT(!(size % 2)); 1472bde8e466a4451c7319e3a072d118917957d6554Steve Block return computeHash<UChar>(static_cast<const UChar*>(data), size / sizeof(UChar)); 1482fc2651226baac27029e38c9d6ef883fa32084dbSteve Block } 1492fc2651226baac27029e38c9d6ef883fa32084dbSteve Block 150bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsenprivate: 151bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen static inline UChar defaultCoverter(UChar ch) 152bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 153bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen return ch; 154bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 155bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 156bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen static inline UChar defaultCoverter(char ch) 157bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 158bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen return static_cast<unsigned char>(ch); 159bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 160bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen 161bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen inline void addCharactersToHash(UChar a, UChar b) 162bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen { 163bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen m_hash += a; 164bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen unsigned tmp = (b << 11) ^ m_hash; 165bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen m_hash = (m_hash << 16) ^ tmp; 166bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen m_hash += m_hash >> 11; 167bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen } 168d0825bca7fe65beaee391d30da42e937db621564Steve Block 169bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen unsigned m_hash; 170bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen bool m_hasPendingCharacter; 171bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen UChar m_pendingCharacter; 172bec39347bb3bb5bf1187ccaf471d26247f28b585Kristian Monsen}; 173d0825bca7fe65beaee391d30da42e937db621564Steve Block 174d0825bca7fe65beaee391d30da42e937db621564Steve Block} // namespace WTF 175d0825bca7fe65beaee391d30da42e937db621564Steve Block 1762bde8e466a4451c7319e3a072d118917957d6554Steve Blockusing WTF::StringHasher; 1772bde8e466a4451c7319e3a072d118917957d6554Steve Block 178a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch#endif // WTF_StringHasher_h 179