1# Copyright (C) 2013 Google, Inc.
2#
3# This library is free software; you can redistribute it and/or
4# modify it under the terms of the GNU Library General Public
5# License as published by the Free Software Foundation; either
6# version 2 of the License, or (at your option) any later version.
7#
8# This library is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11# Library General Public License for more details.
12#
13# You should have received a copy of the GNU Library General Public License
14# along with this library; see the file COPYING.LIB.  If not, write to
15# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
16# Boston, MA 02110-1301, USA.
17
18# This implementaiton of SuperFastHash is based on the Python implementation
19# by Victor Perron at <https://github.com/vperron/python-superfasthash>.
20# We've modified Victor's version to output hash values that match WTFString,
21# which involves using a specific seed and some different constants.
22
23class uint32_t(long):
24    def __rshift__(self, other):
25        return uint32_t(long.__rshift__(self, other) & ((1L << 32) - 1))
26
27    def __lshift__(self, other):
28        return uint32_t(long.__lshift__(self, other) & ((1L << 32) - 1))
29
30    def __add__(self, other):
31        return uint32_t(long.__add__(self, other) & ((1L << 32) - 1))
32
33    def __xor__(self, other):
34        return uint32_t(long.__xor__(self, other) & ((1L << 32) - 1))
35
36
37def hash(string):
38    """
39    Stream-adapted SuperFastHash algorithm from Paul Hsieh,
40    http://www.azillionmonkeys.com/qed/hash.html
41    LGPLv2.1
42    Python version with no dependencies.
43    Victor Perron <victor@iso3103.net>
44    """
45
46    if not string:
47        return 0
48
49    result = uint32_t(0x9E3779B9L)
50    length = len(string)
51    remainder = length & 1
52    length >>= 1
53
54    i = 0
55    while length > 0:
56        length -= 1
57        result += ord(string[i])
58        temp = (ord(string[i + 1]) << 11) ^ result
59        result = (result << 16) ^ temp
60        i += 2
61        result += (result >> 11)
62
63    if remainder == 1:
64        result += ord(string[i])
65        result ^= (result << 11)
66        result += (result >> 17)
67
68    # Force "avalanching" of final 127 bits
69    result ^= (result << 3)
70    result += (result >> 5)
71    result ^= (result << 2)
72    result += (result >> 15)
73    result ^= (result << 10)
74
75    # Save 8 bits for StringImpl to use as flags.
76    result &= 0xffffff
77
78    # This avoids ever returning a hash code of 0, since that is used to
79    # signal "hash not computed yet".
80    assert result != 0
81
82    return result
83