1/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ANDROID_BASIC_HASHTABLE_H
18#define ANDROID_BASIC_HASHTABLE_H
19
20#include <stdint.h>
21#include <sys/types.h>
22#include <utils/SharedBuffer.h>
23#include <utils/TypeHelpers.h>
24
25namespace android {
26
27/* Implementation type.  Nothing to see here. */
28class BasicHashtableImpl {
29protected:
30    struct Bucket {
31        // The collision flag indicates that the bucket is part of a collision chain
32        // such that at least two entries both hash to this bucket.  When true, we
33        // may need to seek further along the chain to find the entry.
34        static const uint32_t COLLISION = 0x80000000UL;
35
36        // The present flag indicates that the bucket contains an initialized entry value.
37        static const uint32_t PRESENT   = 0x40000000UL;
38
39        // Mask for 30 bits worth of the hash code that are stored within the bucket to
40        // speed up lookups and rehashing by eliminating the need to recalculate the
41        // hash code of the entry's key.
42        static const uint32_t HASH_MASK = 0x3fffffffUL;
43
44        // Combined value that stores the collision and present flags as well as
45        // a 30 bit hash code.
46        uint32_t cookie;
47
48        // Storage for the entry begins here.
49        char entry[0];
50    };
51
52    BasicHashtableImpl(size_t entrySize, bool hasTrivialDestructor,
53            size_t minimumInitialCapacity, float loadFactor);
54    BasicHashtableImpl(const BasicHashtableImpl& other);
55    virtual ~BasicHashtableImpl();
56
57    void dispose();
58
59    inline void edit() {
60        if (mBuckets && !SharedBuffer::bufferFromData(mBuckets)->onlyOwner()) {
61            clone();
62        }
63    }
64
65    void setTo(const BasicHashtableImpl& other);
66    void clear();
67
68    ssize_t next(ssize_t index) const;
69    ssize_t find(ssize_t index, hash_t hash, const void* __restrict__ key) const;
70    size_t add(hash_t hash, const void* __restrict__ entry);
71    void removeAt(size_t index);
72    void rehash(size_t minimumCapacity, float loadFactor);
73
74    const size_t mBucketSize; // number of bytes per bucket including the entry
75    const bool mHasTrivialDestructor; // true if the entry type does not require destruction
76    size_t mCapacity;         // number of buckets that can be filled before exceeding load factor
77    float mLoadFactor;        // load factor
78    size_t mSize;             // number of elements actually in the table
79    size_t mFilledBuckets;    // number of buckets for which collision or present is true
80    size_t mBucketCount;      // number of slots in the mBuckets array
81    void* mBuckets;           // array of buckets, as a SharedBuffer
82
83    inline const Bucket& bucketAt(const void* __restrict__ buckets, size_t index) const {
84        return *reinterpret_cast<const Bucket*>(
85                static_cast<const uint8_t*>(buckets) + index * mBucketSize);
86    }
87
88    inline Bucket& bucketAt(void* __restrict__ buckets, size_t index) const {
89        return *reinterpret_cast<Bucket*>(static_cast<uint8_t*>(buckets) + index * mBucketSize);
90    }
91
92    virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const = 0;
93    virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const = 0;
94    virtual void destroyBucketEntry(Bucket& bucket) const = 0;
95
96private:
97    void clone();
98
99    // Allocates a bucket array as a SharedBuffer.
100    void* allocateBuckets(size_t count) const;
101
102    // Releases a bucket array's associated SharedBuffer.
103    void releaseBuckets(void* __restrict__ buckets, size_t count) const;
104
105    // Destroys the contents of buckets (invokes destroyBucketEntry for each
106    // populated bucket if needed).
107    void destroyBuckets(void* __restrict__ buckets, size_t count) const;
108
109    // Copies the content of buckets (copies the cookie and invokes copyBucketEntry
110    // for each populated bucket if needed).
111    void copyBuckets(const void* __restrict__ fromBuckets,
112            void* __restrict__ toBuckets, size_t count) const;
113
114    // Determines the appropriate size of a bucket array to store a certain minimum
115    // number of entries and returns its effective capacity.
116    static void determineCapacity(size_t minimumCapacity, float loadFactor,
117            size_t* __restrict__ outBucketCount, size_t* __restrict__ outCapacity);
118
119    // Trim a hash code to 30 bits to match what we store in the bucket's cookie.
120    inline static hash_t trimHash(hash_t hash) {
121        return (hash & Bucket::HASH_MASK) ^ (hash >> 30);
122    }
123
124    // Returns the index of the first bucket that is in the collision chain
125    // for the specified hash code, given the total number of buckets.
126    // (Primary hash)
127    inline static size_t chainStart(hash_t hash, size_t count) {
128        return hash % count;
129    }
130
131    // Returns the increment to add to a bucket index to seek to the next bucket
132    // in the collision chain for the specified hash code, given the total number of buckets.
133    // (Secondary hash)
134    inline static size_t chainIncrement(hash_t hash, size_t count) {
135        return ((hash >> 7) | (hash << 25)) % (count - 1) + 1;
136    }
137
138    // Returns the index of the next bucket that is in the collision chain
139    // that is defined by the specified increment, given the total number of buckets.
140    inline static size_t chainSeek(size_t index, size_t increment, size_t count) {
141        return (index + increment) % count;
142    }
143};
144
145/*
146 * A BasicHashtable stores entries that are indexed by hash code in place
147 * within an array.  The basic operations are finding entries by key,
148 * adding new entries and removing existing entries.
149 *
150 * This class provides a very limited set of operations with simple semantics.
151 * It is intended to be used as a building block to construct more complex
152 * and interesting data structures such as HashMap.  Think very hard before
153 * adding anything extra to BasicHashtable, it probably belongs at a
154 * higher level of abstraction.
155 *
156 * TKey: The key type.
157 * TEntry: The entry type which is what is actually stored in the array.
158 *
159 * TKey must support the following contract:
160 *     bool operator==(const TKey& other) const;  // return true if equal
161 *     bool operator!=(const TKey& other) const;  // return true if unequal
162 *
163 * TEntry must support the following contract:
164 *     const TKey& getKey() const;  // get the key from the entry
165 *
166 * This class supports storing entries with duplicate keys.  Of course, it can't
167 * tell them apart during removal so only the first entry will be removed.
168 * We do this because it means that operations like add() can't fail.
169 */
170template <typename TKey, typename TEntry>
171class BasicHashtable : private BasicHashtableImpl {
172public:
173    /* Creates a hashtable with the specified minimum initial capacity.
174     * The underlying array will be created when the first entry is added.
175     *
176     * minimumInitialCapacity: The minimum initial capacity for the hashtable.
177     *     Default is 0.
178     * loadFactor: The desired load factor for the hashtable, between 0 and 1.
179     *     Default is 0.75.
180     */
181    BasicHashtable(size_t minimumInitialCapacity = 0, float loadFactor = 0.75f);
182
183    /* Copies a hashtable.
184     * The underlying storage is shared copy-on-write.
185     */
186    BasicHashtable(const BasicHashtable& other);
187
188    /* Clears and destroys the hashtable.
189     */
190    virtual ~BasicHashtable();
191
192    /* Making this hashtable a copy of the other hashtable.
193     * The underlying storage is shared copy-on-write.
194     *
195     * other: The hashtable to copy.
196     */
197    inline BasicHashtable<TKey, TEntry>& operator =(const BasicHashtable<TKey, TEntry> & other) {
198        setTo(other);
199        return *this;
200    }
201
202    /* Returns the number of entries in the hashtable.
203     */
204    inline size_t size() const {
205        return mSize;
206    }
207
208    /* Returns the capacity of the hashtable, which is the number of elements that can
209     * added to the hashtable without requiring it to be grown.
210     */
211    inline size_t capacity() const {
212        return mCapacity;
213    }
214
215    /* Returns the number of buckets that the hashtable has, which is the size of its
216     * underlying array.
217     */
218    inline size_t bucketCount() const {
219        return mBucketCount;
220    }
221
222    /* Returns the load factor of the hashtable. */
223    inline float loadFactor() const {
224        return mLoadFactor;
225    };
226
227    /* Returns a const reference to the entry at the specified index.
228     *
229     * index:   The index of the entry to retrieve.  Must be a valid index within
230     *          the bounds of the hashtable.
231     */
232    inline const TEntry& entryAt(size_t index) const {
233        return entryFor(bucketAt(mBuckets, index));
234    }
235
236    /* Returns a non-const reference to the entry at the specified index.
237     *
238     * index: The index of the entry to edit.  Must be a valid index within
239     *        the bounds of the hashtable.
240     */
241    inline TEntry& editEntryAt(size_t index) {
242        edit();
243        return entryFor(bucketAt(mBuckets, index));
244    }
245
246    /* Clears the hashtable.
247     * All entries in the hashtable are destroyed immediately.
248     * If you need to do something special with the entries in the hashtable then iterate
249     * over them and do what you need before clearing the hashtable.
250     */
251    inline void clear() {
252        BasicHashtableImpl::clear();
253    }
254
255    /* Returns the index of the next entry in the hashtable given the index of a previous entry.
256     * If the given index is -1, then returns the index of the first entry in the hashtable,
257     * if there is one, or -1 otherwise.
258     * If the given index is not -1, then returns the index of the next entry in the hashtable,
259     * in strictly increasing order, or -1 if there are none left.
260     *
261     * index:   The index of the previous entry that was iterated, or -1 to begin
262     *          iteration at the beginning of the hashtable.
263     */
264    inline ssize_t next(ssize_t index) const {
265        return BasicHashtableImpl::next(index);
266    }
267
268    /* Finds the index of an entry with the specified key.
269     * If the given index is -1, then returns the index of the first matching entry,
270     * otherwise returns the index of the next matching entry.
271     * If the hashtable contains multiple entries with keys that match the requested
272     * key, then the sequence of entries returned is arbitrary.
273     * Returns -1 if no entry was found.
274     *
275     * index:   The index of the previous entry with the specified key, or -1 to
276     *          find the first matching entry.
277     * hash:    The hashcode of the key.
278     * key:     The key.
279     */
280    inline ssize_t find(ssize_t index, hash_t hash, const TKey& key) const {
281        return BasicHashtableImpl::find(index, hash, &key);
282    }
283
284    /* Adds the entry to the hashtable.
285     * Returns the index of the newly added entry.
286     * If an entry with the same key already exists, then a duplicate entry is added.
287     * If the entry will not fit, then the hashtable's capacity is increased and
288     * its contents are rehashed.  See rehash().
289     *
290     * hash:    The hashcode of the key.
291     * entry:   The entry to add.
292     */
293    inline size_t add(hash_t hash, const TEntry& entry) {
294        return BasicHashtableImpl::add(hash, &entry);
295    }
296
297    /* Removes the entry with the specified index from the hashtable.
298     * The entry is destroyed immediately.
299     * The index must be valid.
300     *
301     * The hashtable is not compacted after an item is removed, so it is legal
302     * to continue iterating over the hashtable using next() or find().
303     *
304     * index:   The index of the entry to remove.  Must be a valid index within the
305     *          bounds of the hashtable, and it must refer to an existing entry.
306     */
307    inline void removeAt(size_t index) {
308        BasicHashtableImpl::removeAt(index);
309    }
310
311    /* Rehashes the contents of the hashtable.
312     * Grows the hashtable to at least the specified minimum capacity or the
313     * current number of elements, whichever is larger.
314     *
315     * Rehashing causes all entries to be copied and the entry indices may change.
316     * Although the hash codes are cached by the hashtable, rehashing can be an
317     * expensive operation and should be avoided unless the hashtable's size
318     * needs to be changed.
319     *
320     * Rehashing is the only way to change the capacity or load factor of the
321     * hashtable once it has been created.  It can be used to compact the
322     * hashtable by choosing a minimum capacity that is smaller than the current
323     * capacity (such as 0).
324     *
325     * minimumCapacity: The desired minimum capacity after rehashing.
326     * loadFactor: The desired load factor after rehashing.
327     */
328    inline void rehash(size_t minimumCapacity, float loadFactor) {
329        BasicHashtableImpl::rehash(minimumCapacity, loadFactor);
330    }
331
332    /* Determines whether there is room to add another entry without rehashing.
333     * When this returns true, a subsequent add() operation is guaranteed to
334     * complete without performing a rehash.
335     */
336    inline bool hasMoreRoom() const {
337        return mCapacity > mFilledBuckets;
338    }
339
340protected:
341    static inline const TEntry& entryFor(const Bucket& bucket) {
342        return reinterpret_cast<const TEntry&>(bucket.entry);
343    }
344
345    static inline TEntry& entryFor(Bucket& bucket) {
346        return reinterpret_cast<TEntry&>(bucket.entry);
347    }
348
349    virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const;
350    virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const;
351    virtual void destroyBucketEntry(Bucket& bucket) const;
352
353private:
354    // For dumping the raw contents of a hashtable during testing.
355    friend class BasicHashtableTest;
356    inline uint32_t cookieAt(size_t index) const {
357        return bucketAt(mBuckets, index).cookie;
358    }
359};
360
361template <typename TKey, typename TEntry>
362BasicHashtable<TKey, TEntry>::BasicHashtable(size_t minimumInitialCapacity, float loadFactor) :
363        BasicHashtableImpl(sizeof(TEntry), traits<TEntry>::has_trivial_dtor,
364                minimumInitialCapacity, loadFactor) {
365}
366
367template <typename TKey, typename TEntry>
368BasicHashtable<TKey, TEntry>::BasicHashtable(const BasicHashtable<TKey, TEntry>& other) :
369        BasicHashtableImpl(other) {
370}
371
372template <typename TKey, typename TEntry>
373BasicHashtable<TKey, TEntry>::~BasicHashtable() {
374    dispose();
375}
376
377template <typename TKey, typename TEntry>
378bool BasicHashtable<TKey, TEntry>::compareBucketKey(const Bucket& bucket,
379        const void* __restrict__ key) const {
380    return entryFor(bucket).getKey() == *static_cast<const TKey*>(key);
381}
382
383template <typename TKey, typename TEntry>
384void BasicHashtable<TKey, TEntry>::initializeBucketEntry(Bucket& bucket,
385        const void* __restrict__ entry) const {
386    if (!traits<TEntry>::has_trivial_copy) {
387        new (&entryFor(bucket)) TEntry(*(static_cast<const TEntry*>(entry)));
388    } else {
389        memcpy(&entryFor(bucket), entry, sizeof(TEntry));
390    }
391}
392
393template <typename TKey, typename TEntry>
394void BasicHashtable<TKey, TEntry>::destroyBucketEntry(Bucket& bucket) const {
395    if (!traits<TEntry>::has_trivial_dtor) {
396        entryFor(bucket).~TEntry();
397    }
398}
399
400}; // namespace android
401
402#endif // ANDROID_BASIC_HASHTABLE_H
403