Hash.cpp revision b74e7190e86d559712747e5cdb31a0d390b7af7d
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/*
17 * Hash table.  The dominant calls are add and lookup, with removals
18 * happening very infrequently.  We use probing, and don't worry much
19 * about tombstone removal.
20 */
21#include "Dalvik.h"
22
23#include <stdlib.h>
24
25/* table load factor, i.e. how full can it get before we resize */
26//#define LOAD_NUMER  3       // 75%
27//#define LOAD_DENOM  4
28#define LOAD_NUMER  5       // 62.5%
29#define LOAD_DENOM  8
30//#define LOAD_NUMER  1       // 50%
31//#define LOAD_DENOM  2
32
33/*
34 * Compute the capacity needed for a table to hold "size" elements.
35 */
36size_t dvmHashSize(size_t size) {
37    return (size * LOAD_DENOM) / LOAD_NUMER +1;
38}
39
40
41/*
42 * Create and initialize a hash table.
43 */
44HashTable* dvmHashTableCreate(size_t initialSize, HashFreeFunc freeFunc)
45{
46    HashTable* pHashTable;
47
48    assert(initialSize > 0);
49
50    pHashTable = (HashTable*) malloc(sizeof(*pHashTable));
51    if (pHashTable == NULL)
52        return NULL;
53
54    dvmInitMutex(&pHashTable->lock);
55
56    pHashTable->tableSize = dexRoundUpPower2(initialSize);
57    pHashTable->numEntries = pHashTable->numDeadEntries = 0;
58    pHashTable->freeFunc = freeFunc;
59    pHashTable->pEntries =
60        (HashEntry*) calloc(pHashTable->tableSize, sizeof(HashEntry));
61    if (pHashTable->pEntries == NULL) {
62        free(pHashTable);
63        return NULL;
64    }
65
66    return pHashTable;
67}
68
69/*
70 * Clear out all entries.
71 */
72void dvmHashTableClear(HashTable* pHashTable)
73{
74    HashEntry* pEnt;
75    int i;
76
77    pEnt = pHashTable->pEntries;
78    for (i = 0; i < pHashTable->tableSize; i++, pEnt++) {
79        if (pEnt->data == HASH_TOMBSTONE) {
80            // nuke entry
81            pEnt->data = NULL;
82        } else if (pEnt->data != NULL) {
83            // call free func then nuke entry
84            if (pHashTable->freeFunc != NULL)
85                (*pHashTable->freeFunc)(pEnt->data);
86            pEnt->data = NULL;
87        }
88    }
89
90    pHashTable->numEntries = 0;
91    pHashTable->numDeadEntries = 0;
92}
93
94/*
95 * Free the table.
96 */
97void dvmHashTableFree(HashTable* pHashTable)
98{
99    if (pHashTable == NULL)
100        return;
101    dvmHashTableClear(pHashTable);
102    free(pHashTable->pEntries);
103    free(pHashTable);
104}
105
106#ifndef NDEBUG
107/*
108 * Count up the number of tombstone entries in the hash table.
109 */
110static int countTombStones(HashTable* pHashTable)
111{
112    int i, count;
113
114    for (count = i = 0; i < pHashTable->tableSize; i++) {
115        if (pHashTable->pEntries[i].data == HASH_TOMBSTONE)
116            count++;
117    }
118    return count;
119}
120#endif
121
122/*
123 * Resize a hash table.  We do this when adding an entry increased the
124 * size of the table beyond its comfy limit.
125 *
126 * This essentially requires re-inserting all elements into the new storage.
127 *
128 * If multiple threads can access the hash table, the table's lock should
129 * have been grabbed before issuing the "lookup+add" call that led to the
130 * resize, so we don't have a synchronization problem here.
131 */
132static bool resizeHash(HashTable* pHashTable, int newSize)
133{
134    HashEntry* pNewEntries;
135    int i;
136
137    assert(countTombStones(pHashTable) == pHashTable->numDeadEntries);
138    //ALOGI("before: dead=%d", pHashTable->numDeadEntries);
139
140    pNewEntries = (HashEntry*) calloc(newSize, sizeof(HashEntry));
141    if (pNewEntries == NULL)
142        return false;
143
144    for (i = 0; i < pHashTable->tableSize; i++) {
145        void* data = pHashTable->pEntries[i].data;
146        if (data != NULL && data != HASH_TOMBSTONE) {
147            int hashValue = pHashTable->pEntries[i].hashValue;
148            int newIdx;
149
150            /* probe for new spot, wrapping around */
151            newIdx = hashValue & (newSize-1);
152            while (pNewEntries[newIdx].data != NULL)
153                newIdx = (newIdx + 1) & (newSize-1);
154
155            pNewEntries[newIdx].hashValue = hashValue;
156            pNewEntries[newIdx].data = data;
157        }
158    }
159
160    free(pHashTable->pEntries);
161    pHashTable->pEntries = pNewEntries;
162    pHashTable->tableSize = newSize;
163    pHashTable->numDeadEntries = 0;
164
165    assert(countTombStones(pHashTable) == 0);
166    return true;
167}
168
169/*
170 * Look up an entry.
171 *
172 * We probe on collisions, wrapping around the table.
173 */
174void* dvmHashTableLookup(HashTable* pHashTable, u4 itemHash, void* item,
175    HashCompareFunc cmpFunc, bool doAdd)
176{
177    HashEntry* pEntry;
178    HashEntry* pEnd;
179    void* result = NULL;
180
181    assert(pHashTable->tableSize > 0);
182    assert(item != HASH_TOMBSTONE);
183    assert(item != NULL);
184
185    /* jump to the first entry and probe for a match */
186    pEntry = &pHashTable->pEntries[itemHash & (pHashTable->tableSize-1)];
187    pEnd = &pHashTable->pEntries[pHashTable->tableSize];
188    while (pEntry->data != NULL) {
189        if (pEntry->data != HASH_TOMBSTONE &&
190            pEntry->hashValue == itemHash &&
191            (*cmpFunc)(pEntry->data, item) == 0)
192        {
193            /* match */
194            //ALOGD("+++ match on entry %d", pEntry - pHashTable->pEntries);
195            break;
196        }
197
198        pEntry++;
199        if (pEntry == pEnd) {     /* wrap around to start */
200            if (pHashTable->tableSize == 1)
201                break;      /* edge case - single-entry table */
202            pEntry = pHashTable->pEntries;
203        }
204
205        //ALOGI("+++ look probing %d...", pEntry - pHashTable->pEntries);
206    }
207
208    if (pEntry->data == NULL) {
209        if (doAdd) {
210            pEntry->hashValue = itemHash;
211            pEntry->data = item;
212            pHashTable->numEntries++;
213
214            /*
215             * We've added an entry.  See if this brings us too close to full.
216             */
217            if ((pHashTable->numEntries+pHashTable->numDeadEntries) * LOAD_DENOM
218                > pHashTable->tableSize * LOAD_NUMER)
219            {
220                if (!resizeHash(pHashTable, pHashTable->tableSize * 2)) {
221                    /* don't really have a way to indicate failure */
222                    ALOGE("Dalvik hash resize failure");
223                    dvmAbort();
224                }
225                /* note "pEntry" is now invalid */
226            } else {
227                //ALOGW("okay %d/%d/%d",
228                //    pHashTable->numEntries, pHashTable->tableSize,
229                //    (pHashTable->tableSize * LOAD_NUMER) / LOAD_DENOM);
230            }
231
232            /* full table is bad -- search for nonexistent never halts */
233            assert(pHashTable->numEntries < pHashTable->tableSize);
234            result = item;
235        } else {
236            assert(result == NULL);
237        }
238    } else {
239        result = pEntry->data;
240    }
241
242    return result;
243}
244
245/*
246 * Remove an entry from the table.
247 *
248 * Does NOT invoke the "free" function on the item.
249 */
250bool dvmHashTableRemove(HashTable* pHashTable, u4 itemHash, void* item)
251{
252    HashEntry* pEntry;
253    HashEntry* pEnd;
254
255    assert(pHashTable->tableSize > 0);
256
257    /* jump to the first entry and probe for a match */
258    pEntry = &pHashTable->pEntries[itemHash & (pHashTable->tableSize-1)];
259    pEnd = &pHashTable->pEntries[pHashTable->tableSize];
260    while (pEntry->data != NULL) {
261        if (pEntry->data == item) {
262            //ALOGI("+++ stepping on entry %d", pEntry - pHashTable->pEntries);
263            pEntry->data = HASH_TOMBSTONE;
264            pHashTable->numEntries--;
265            pHashTable->numDeadEntries++;
266            return true;
267        }
268
269        pEntry++;
270        if (pEntry == pEnd) {     /* wrap around to start */
271            if (pHashTable->tableSize == 1)
272                break;      /* edge case - single-entry table */
273            pEntry = pHashTable->pEntries;
274        }
275
276        //ALOGI("+++ del probing %d...", pEntry - pHashTable->pEntries);
277    }
278
279    return false;
280}
281
282/*
283 * Scan every entry in the hash table and evaluate it with the specified
284 * indirect function call. If the function returns 1, remove the entry from
285 * the table.
286 *
287 * Does NOT invoke the "free" function on the item.
288 *
289 * Returning values other than 0 or 1 will abort the routine.
290 */
291int dvmHashForeachRemove(HashTable* pHashTable, HashForeachRemoveFunc func)
292{
293    int i, val;
294
295    for (i = 0; i < pHashTable->tableSize; i++) {
296        HashEntry* pEnt = &pHashTable->pEntries[i];
297
298        if (pEnt->data != NULL && pEnt->data != HASH_TOMBSTONE) {
299            val = (*func)(pEnt->data);
300            if (val == 1) {
301                pEnt->data = HASH_TOMBSTONE;
302                pHashTable->numEntries--;
303                pHashTable->numDeadEntries++;
304            }
305            else if (val != 0) {
306                return val;
307            }
308        }
309    }
310    return 0;
311}
312
313
314/*
315 * Execute a function on every entry in the hash table.
316 *
317 * If "func" returns a nonzero value, terminate early and return the value.
318 */
319int dvmHashForeach(HashTable* pHashTable, HashForeachFunc func, void* arg)
320{
321    int i, val;
322
323    for (i = 0; i < pHashTable->tableSize; i++) {
324        HashEntry* pEnt = &pHashTable->pEntries[i];
325
326        if (pEnt->data != NULL && pEnt->data != HASH_TOMBSTONE) {
327            val = (*func)(pEnt->data, arg);
328            if (val != 0)
329                return val;
330        }
331    }
332
333    return 0;
334}
335
336
337/*
338 * Look up an entry, counting the number of times we have to probe.
339 *
340 * Returns -1 if the entry wasn't found.
341 */
342static int countProbes(HashTable* pHashTable, u4 itemHash, const void* item,
343    HashCompareFunc cmpFunc)
344{
345    HashEntry* pEntry;
346    HashEntry* pEnd;
347    int count = 0;
348
349    assert(pHashTable->tableSize > 0);
350    assert(item != HASH_TOMBSTONE);
351    assert(item != NULL);
352
353    /* jump to the first entry and probe for a match */
354    pEntry = &pHashTable->pEntries[itemHash & (pHashTable->tableSize-1)];
355    pEnd = &pHashTable->pEntries[pHashTable->tableSize];
356    while (pEntry->data != NULL) {
357        if (pEntry->data != HASH_TOMBSTONE &&
358            pEntry->hashValue == itemHash &&
359            (*cmpFunc)(pEntry->data, item) == 0)
360        {
361            /* match */
362            break;
363        }
364
365        pEntry++;
366        if (pEntry == pEnd) {     /* wrap around to start */
367            if (pHashTable->tableSize == 1)
368                break;      /* edge case - single-entry table */
369            pEntry = pHashTable->pEntries;
370        }
371
372        count++;
373    }
374    if (pEntry->data == NULL)
375        return -1;
376
377    return count;
378}
379
380/*
381 * Evaluate the amount of probing required for the specified hash table.
382 *
383 * We do this by running through all entries in the hash table, computing
384 * the hash value and then doing a lookup.
385 *
386 * The caller should lock the table before calling here.
387 */
388void dvmHashTableProbeCount(HashTable* pHashTable, HashCalcFunc calcFunc,
389    HashCompareFunc cmpFunc)
390{
391    int numEntries, minProbe, maxProbe, totalProbe;
392    HashIter iter;
393
394    numEntries = maxProbe = totalProbe = 0;
395    minProbe = 65536*32767;
396
397    for (dvmHashIterBegin(pHashTable, &iter); !dvmHashIterDone(&iter);
398        dvmHashIterNext(&iter))
399    {
400        const void* data = (const void*)dvmHashIterData(&iter);
401        int count;
402
403        count = countProbes(pHashTable, (*calcFunc)(data), data, cmpFunc);
404
405        numEntries++;
406
407        if (count < minProbe)
408            minProbe = count;
409        if (count > maxProbe)
410            maxProbe = count;
411        totalProbe += count;
412    }
413
414    ALOGI("Probe: min=%d max=%d, total=%d in %d (%d), avg=%.3f",
415        minProbe, maxProbe, totalProbe, numEntries, pHashTable->tableSize,
416        (float) totalProbe / (float) numEntries);
417}
418