1/*
2******************************************************************************
3*   Copyright (C) 1997-2009, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5******************************************************************************
6*   Date        Name        Description
7*   03/22/00    aliu        Adapted from original C++ ICU Hashtable.
8*   07/06/01    aliu        Modified to support int32_t keys on
9*                           platforms with sizeof(void*) < 32.
10******************************************************************************
11*/
12
13#include "uhash.h"
14#include "unicode/ustring.h"
15#include "cstring.h"
16#include "cmemory.h"
17#include "uassert.h"
18
19/* This hashtable is implemented as a double hash.  All elements are
20 * stored in a single array with no secondary storage for collision
21 * resolution (no linked list, etc.).  When there is a hash collision
22 * (when two unequal keys have the same hashcode) we resolve this by
23 * using a secondary hash.  The secondary hash is an increment
24 * computed as a hash function (a different one) of the primary
25 * hashcode.  This increment is added to the initial hash value to
26 * obtain further slots assigned to the same hash code.  For this to
27 * work, the length of the array and the increment must be relatively
28 * prime.  The easiest way to achieve this is to have the length of
29 * the array be prime, and the increment be any value from
30 * 1..length-1.
31 *
32 * Hashcodes are 32-bit integers.  We make sure all hashcodes are
33 * non-negative by masking off the top bit.  This has two effects: (1)
34 * modulo arithmetic is simplified.  If we allowed negative hashcodes,
35 * then when we computed hashcode % length, we could get a negative
36 * result, which we would then have to adjust back into range.  It's
37 * simpler to just make hashcodes non-negative. (2) It makes it easy
38 * to check for empty vs. occupied slots in the table.  We just mark
39 * empty or deleted slots with a negative hashcode.
40 *
41 * The central function is _uhash_find().  This function looks for a
42 * slot matching the given key and hashcode.  If one is found, it
43 * returns a pointer to that slot.  If the table is full, and no match
44 * is found, it returns NULL -- in theory.  This would make the code
45 * more complicated, since all callers of _uhash_find() would then
46 * have to check for a NULL result.  To keep this from happening, we
47 * don't allow the table to fill.  When there is only one
48 * empty/deleted slot left, uhash_put() will refuse to increase the
49 * count, and fail.  This simplifies the code.  In practice, one will
50 * seldom encounter this using default UHashtables.  However, if a
51 * hashtable is set to a U_FIXED resize policy, or if memory is
52 * exhausted, then the table may fill.
53 *
54 * High and low water ratios control rehashing.  They establish levels
55 * of fullness (from 0 to 1) outside of which the data array is
56 * reallocated and repopulated.  Setting the low water ratio to zero
57 * means the table will never shrink.  Setting the high water ratio to
58 * one means the table will never grow.  The ratios should be
59 * coordinated with the ratio between successive elements of the
60 * PRIMES table, so that when the primeIndex is incremented or
61 * decremented during rehashing, it brings the ratio of count / length
62 * back into the desired range (between low and high water ratios).
63 */
64
65/********************************************************************
66 * PRIVATE Constants, Macros
67 ********************************************************************/
68
69/* This is a list of non-consecutive primes chosen such that
70 * PRIMES[i+1] ~ 2*PRIMES[i].  (Currently, the ratio ranges from 1.81
71 * to 2.18; the inverse ratio ranges from 0.459 to 0.552.)  If this
72 * ratio is changed, the low and high water ratios should also be
73 * adjusted to suit.
74 *
75 * These prime numbers were also chosen so that they are the largest
76 * prime number while being less than a power of two.
77 */
78static const int32_t PRIMES[] = {
79    13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749,
80    65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593,
81    16777213, 33554393, 67108859, 134217689, 268435399, 536870909,
82    1073741789, 2147483647 /*, 4294967291 */
83};
84
85#define PRIMES_LENGTH (sizeof(PRIMES) / sizeof(PRIMES[0]))
86#define DEFAULT_PRIME_INDEX 3
87
88/* These ratios are tuned to the PRIMES array such that a resize
89 * places the table back into the zone of non-resizing.  That is,
90 * after a call to _uhash_rehash(), a subsequent call to
91 * _uhash_rehash() should do nothing (should not churn).  This is only
92 * a potential problem with U_GROW_AND_SHRINK.
93 */
94static const float RESIZE_POLICY_RATIO_TABLE[6] = {
95    /* low, high water ratio */
96    0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */
97    0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */
98    0.0F, 1.0F  /* U_FIXED: Never change size */
99};
100
101/*
102  Invariants for hashcode values:
103
104  * DELETED < 0
105  * EMPTY < 0
106  * Real hashes >= 0
107
108  Hashcodes may not start out this way, but internally they are
109  adjusted so that they are always positive.  We assume 32-bit
110  hashcodes; adjust these constants for other hashcode sizes.
111*/
112#define HASH_DELETED    ((int32_t) 0x80000000)
113#define HASH_EMPTY      ((int32_t) HASH_DELETED + 1)
114
115#define IS_EMPTY_OR_DELETED(x) ((x) < 0)
116
117/* This macro expects a UHashTok.pointer as its keypointer and
118   valuepointer parameters */
119#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) \
120            if (hash->keyDeleter != NULL && keypointer != NULL) { \
121                (*hash->keyDeleter)(keypointer); \
122            } \
123            if (hash->valueDeleter != NULL && valuepointer != NULL) { \
124                (*hash->valueDeleter)(valuepointer); \
125            }
126
127/*
128 * Constants for hinting whether a key or value is an integer
129 * or a pointer.  If a hint bit is zero, then the associated
130 * token is assumed to be an integer.
131 */
132#define HINT_KEY_POINTER   (1)
133#define HINT_VALUE_POINTER (2)
134
135/********************************************************************
136 * PRIVATE Implementation
137 ********************************************************************/
138
139static UHashTok
140_uhash_setElement(UHashtable *hash, UHashElement* e,
141                  int32_t hashcode,
142                  UHashTok key, UHashTok value, int8_t hint) {
143
144    UHashTok oldValue = e->value;
145    if (hash->keyDeleter != NULL && e->key.pointer != NULL &&
146        e->key.pointer != key.pointer) { /* Avoid double deletion */
147        (*hash->keyDeleter)(e->key.pointer);
148    }
149    if (hash->valueDeleter != NULL) {
150        if (oldValue.pointer != NULL &&
151            oldValue.pointer != value.pointer) { /* Avoid double deletion */
152            (*hash->valueDeleter)(oldValue.pointer);
153        }
154        oldValue.pointer = NULL;
155    }
156    /* Compilers should copy the UHashTok union correctly, but even if
157     * they do, memory heap tools (e.g. BoundsChecker) can get
158     * confused when a pointer is cloaked in a union and then copied.
159     * TO ALLEVIATE THIS, we use hints (based on what API the user is
160     * calling) to copy pointers when we know the user thinks
161     * something is a pointer. */
162    if (hint & HINT_KEY_POINTER) {
163        e->key.pointer = key.pointer;
164    } else {
165        e->key = key;
166    }
167    if (hint & HINT_VALUE_POINTER) {
168        e->value.pointer = value.pointer;
169    } else {
170        e->value = value;
171    }
172    e->hashcode = hashcode;
173    return oldValue;
174}
175
176/**
177 * Assumes that the given element is not empty or deleted.
178 */
179static UHashTok
180_uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) {
181    UHashTok empty;
182    U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode));
183    --hash->count;
184    empty.pointer = NULL; empty.integer = 0;
185    return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0);
186}
187
188static void
189_uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
190    U_ASSERT(hash != NULL);
191    U_ASSERT(((int32_t)policy) >= 0);
192    U_ASSERT(((int32_t)policy) < 3);
193    hash->lowWaterRatio  = RESIZE_POLICY_RATIO_TABLE[policy * 2];
194    hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1];
195}
196
197/**
198 * Allocate internal data array of a size determined by the given
199 * prime index.  If the index is out of range it is pinned into range.
200 * If the allocation fails the status is set to
201 * U_MEMORY_ALLOCATION_ERROR and all array storage is freed.  In
202 * either case the previous array pointer is overwritten.
203 *
204 * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1.
205 */
206static void
207_uhash_allocate(UHashtable *hash,
208                int32_t primeIndex,
209                UErrorCode *status) {
210
211    UHashElement *p, *limit;
212    UHashTok emptytok;
213
214    if (U_FAILURE(*status)) return;
215
216    U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH);
217
218    hash->primeIndex = primeIndex;
219    hash->length = PRIMES[primeIndex];
220
221    p = hash->elements = (UHashElement*)
222        uprv_malloc(sizeof(UHashElement) * hash->length);
223
224    if (hash->elements == NULL) {
225        *status = U_MEMORY_ALLOCATION_ERROR;
226        return;
227    }
228
229    emptytok.pointer = NULL; /* Only one of these two is needed */
230    emptytok.integer = 0;    /* but we don't know which one. */
231
232    limit = p + hash->length;
233    while (p < limit) {
234        p->key = emptytok;
235        p->value = emptytok;
236        p->hashcode = HASH_EMPTY;
237        ++p;
238    }
239
240    hash->count = 0;
241    hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio);
242    hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
243}
244
245static UHashtable*
246_uhash_init(UHashtable *result,
247              UHashFunction *keyHash,
248              UKeyComparator *keyComp,
249              UValueComparator *valueComp,
250              int32_t primeIndex,
251              UErrorCode *status)
252{
253    if (U_FAILURE(*status)) return NULL;
254    U_ASSERT(keyHash != NULL);
255    U_ASSERT(keyComp != NULL);
256
257    result->keyHasher       = keyHash;
258    result->keyComparator   = keyComp;
259    result->valueComparator = valueComp;
260    result->keyDeleter      = NULL;
261    result->valueDeleter    = NULL;
262    result->allocated       = FALSE;
263    _uhash_internalSetResizePolicy(result, U_GROW);
264
265    _uhash_allocate(result, primeIndex, status);
266
267    if (U_FAILURE(*status)) {
268        return NULL;
269    }
270
271    return result;
272}
273
274static UHashtable*
275_uhash_create(UHashFunction *keyHash,
276              UKeyComparator *keyComp,
277              UValueComparator *valueComp,
278              int32_t primeIndex,
279              UErrorCode *status) {
280    UHashtable *result;
281
282    if (U_FAILURE(*status)) return NULL;
283
284    result = (UHashtable*) uprv_malloc(sizeof(UHashtable));
285    if (result == NULL) {
286        *status = U_MEMORY_ALLOCATION_ERROR;
287        return NULL;
288    }
289
290    _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status);
291    result->allocated       = TRUE;
292
293    if (U_FAILURE(*status)) {
294        uprv_free(result);
295        return NULL;
296    }
297
298    return result;
299}
300
301/**
302 * Look for a key in the table, or if no such key exists, the first
303 * empty slot matching the given hashcode.  Keys are compared using
304 * the keyComparator function.
305 *
306 * First find the start position, which is the hashcode modulo
307 * the length.  Test it to see if it is:
308 *
309 * a. identical:  First check the hash values for a quick check,
310 *    then compare keys for equality using keyComparator.
311 * b. deleted
312 * c. empty
313 *
314 * Stop if it is identical or empty, otherwise continue by adding a
315 * "jump" value (moduloing by the length again to keep it within
316 * range) and retesting.  For efficiency, there need enough empty
317 * values so that the searchs stop within a reasonable amount of time.
318 * This can be changed by changing the high/low water marks.
319 *
320 * In theory, this function can return NULL, if it is full (no empty
321 * or deleted slots) and if no matching key is found.  In practice, we
322 * prevent this elsewhere (in uhash_put) by making sure the last slot
323 * in the table is never filled.
324 *
325 * The size of the table should be prime for this algorithm to work;
326 * otherwise we are not guaranteed that the jump value (the secondary
327 * hash) is relatively prime to the table length.
328 */
329static UHashElement*
330_uhash_find(const UHashtable *hash, UHashTok key,
331            int32_t hashcode) {
332
333    int32_t firstDeleted = -1;  /* assume invalid index */
334    int32_t theIndex, startIndex;
335    int32_t jump = 0; /* lazy evaluate */
336    int32_t tableHash;
337    UHashElement *elements = hash->elements;
338
339    hashcode &= 0x7FFFFFFF; /* must be positive */
340    startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length;
341
342    do {
343        tableHash = elements[theIndex].hashcode;
344        if (tableHash == hashcode) {          /* quick check */
345            if ((*hash->keyComparator)(key, elements[theIndex].key)) {
346                return &(elements[theIndex]);
347            }
348        } else if (!IS_EMPTY_OR_DELETED(tableHash)) {
349            /* We have hit a slot which contains a key-value pair,
350             * but for which the hash code does not match.  Keep
351             * looking.
352             */
353        } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */
354            break;
355        } else if (firstDeleted < 0) { /* remember first deleted */
356            firstDeleted = theIndex;
357        }
358        if (jump == 0) { /* lazy compute jump */
359            /* The jump value must be relatively prime to the table
360             * length.  As long as the length is prime, then any value
361             * 1..length-1 will be relatively prime to it.
362             */
363            jump = (hashcode % (hash->length - 1)) + 1;
364        }
365        theIndex = (theIndex + jump) % hash->length;
366    } while (theIndex != startIndex);
367
368    if (firstDeleted >= 0) {
369        theIndex = firstDeleted; /* reset if had deleted slot */
370    } else if (tableHash != HASH_EMPTY) {
371        /* We get to this point if the hashtable is full (no empty or
372         * deleted slots), and we've failed to find a match.  THIS
373         * WILL NEVER HAPPEN as long as uhash_put() makes sure that
374         * count is always < length.
375         */
376        U_ASSERT(FALSE);
377        return NULL; /* Never happens if uhash_put() behaves */
378    }
379    return &(elements[theIndex]);
380}
381
382/**
383 * Attempt to grow or shrink the data arrays in order to make the
384 * count fit between the high and low water marks.  hash_put() and
385 * hash_remove() call this method when the count exceeds the high or
386 * low water marks.  This method may do nothing, if memory allocation
387 * fails, or if the count is already in range, or if the length is
388 * already at the low or high limit.  In any case, upon return the
389 * arrays will be valid.
390 */
391static void
392_uhash_rehash(UHashtable *hash, UErrorCode *status) {
393
394    UHashElement *old = hash->elements;
395    int32_t oldLength = hash->length;
396    int32_t newPrimeIndex = hash->primeIndex;
397    int32_t i;
398
399    if (hash->count > hash->highWaterMark) {
400        if (++newPrimeIndex >= PRIMES_LENGTH) {
401            return;
402        }
403    } else if (hash->count < hash->lowWaterMark) {
404        if (--newPrimeIndex < 0) {
405            return;
406        }
407    } else {
408        return;
409    }
410
411    _uhash_allocate(hash, newPrimeIndex, status);
412
413    if (U_FAILURE(*status)) {
414        hash->elements = old;
415        hash->length = oldLength;
416        return;
417    }
418
419    for (i = oldLength - 1; i >= 0; --i) {
420        if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) {
421            UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode);
422            U_ASSERT(e != NULL);
423            U_ASSERT(e->hashcode == HASH_EMPTY);
424            e->key = old[i].key;
425            e->value = old[i].value;
426            e->hashcode = old[i].hashcode;
427            ++hash->count;
428        }
429    }
430
431    uprv_free(old);
432}
433
434static UHashTok
435_uhash_remove(UHashtable *hash,
436              UHashTok key) {
437    /* First find the position of the key in the table.  If the object
438     * has not been removed already, remove it.  If the user wanted
439     * keys deleted, then delete it also.  We have to put a special
440     * hashcode in that position that means that something has been
441     * deleted, since when we do a find, we have to continue PAST any
442     * deleted values.
443     */
444    UHashTok result;
445    UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key));
446    U_ASSERT(e != NULL);
447    result.pointer = NULL;
448    result.integer = 0;
449    if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
450        result = _uhash_internalRemoveElement(hash, e);
451        if (hash->count < hash->lowWaterMark) {
452            UErrorCode status = U_ZERO_ERROR;
453            _uhash_rehash(hash, &status);
454        }
455    }
456    return result;
457}
458
459static UHashTok
460_uhash_put(UHashtable *hash,
461           UHashTok key,
462           UHashTok value,
463           int8_t hint,
464           UErrorCode *status) {
465
466    /* Put finds the position in the table for the new value.  If the
467     * key is already in the table, it is deleted, if there is a
468     * non-NULL keyDeleter.  Then the key, the hash and the value are
469     * all put at the position in their respective arrays.
470     */
471    int32_t hashcode;
472    UHashElement* e;
473    UHashTok emptytok;
474
475    if (U_FAILURE(*status)) {
476        goto err;
477    }
478    U_ASSERT(hash != NULL);
479    /* Cannot always check pointer here or iSeries sees NULL every time. */
480    if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
481        /* Disallow storage of NULL values, since NULL is returned by
482         * get() to indicate an absent key.  Storing NULL == removing.
483         */
484        return _uhash_remove(hash, key);
485    }
486    if (hash->count > hash->highWaterMark) {
487        _uhash_rehash(hash, status);
488        if (U_FAILURE(*status)) {
489            goto err;
490        }
491    }
492
493    hashcode = (*hash->keyHasher)(key);
494    e = _uhash_find(hash, key, hashcode);
495    U_ASSERT(e != NULL);
496
497    if (IS_EMPTY_OR_DELETED(e->hashcode)) {
498        /* Important: We must never actually fill the table up.  If we
499         * do so, then _uhash_find() will return NULL, and we'll have
500         * to check for NULL after every call to _uhash_find().  To
501         * avoid this we make sure there is always at least one empty
502         * or deleted slot in the table.  This only is a problem if we
503         * are out of memory and rehash isn't working.
504         */
505        ++hash->count;
506        if (hash->count == hash->length) {
507            /* Don't allow count to reach length */
508            --hash->count;
509            *status = U_MEMORY_ALLOCATION_ERROR;
510            goto err;
511        }
512    }
513
514    /* We must in all cases handle storage properly.  If there was an
515     * old key, then it must be deleted (if the deleter != NULL).
516     * Make hashcodes stored in table positive.
517     */
518    return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint);
519
520 err:
521    /* If the deleters are non-NULL, this method adopts its key and/or
522     * value arguments, and we must be sure to delete the key and/or
523     * value in all cases, even upon failure.
524     */
525    HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer);
526    emptytok.pointer = NULL; emptytok.integer = 0;
527    return emptytok;
528}
529
530
531/********************************************************************
532 * PUBLIC API
533 ********************************************************************/
534
535U_CAPI UHashtable* U_EXPORT2
536uhash_open(UHashFunction *keyHash,
537           UKeyComparator *keyComp,
538           UValueComparator *valueComp,
539           UErrorCode *status) {
540
541    return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
542}
543
544U_CAPI UHashtable* U_EXPORT2
545uhash_openSize(UHashFunction *keyHash,
546               UKeyComparator *keyComp,
547               UValueComparator *valueComp,
548               int32_t size,
549               UErrorCode *status) {
550
551    /* Find the smallest index i for which PRIMES[i] >= size. */
552    int32_t i = 0;
553    while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) {
554        ++i;
555    }
556
557    return _uhash_create(keyHash, keyComp, valueComp, i, status);
558}
559
560U_CAPI UHashtable* U_EXPORT2
561uhash_init(UHashtable *fillinResult,
562           UHashFunction *keyHash,
563           UKeyComparator *keyComp,
564           UValueComparator *valueComp,
565           UErrorCode *status) {
566
567    return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status);
568}
569
570U_CAPI void U_EXPORT2
571uhash_close(UHashtable *hash) {
572    if (hash == NULL) {
573        return;
574    }
575    if (hash->elements != NULL) {
576        if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) {
577            int32_t pos=-1;
578            UHashElement *e;
579            while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) {
580                HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer);
581            }
582        }
583        uprv_free(hash->elements);
584        hash->elements = NULL;
585    }
586    if (hash->allocated) {
587        uprv_free(hash);
588    }
589}
590
591U_CAPI UHashFunction *U_EXPORT2
592uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) {
593    UHashFunction *result = hash->keyHasher;
594    hash->keyHasher = fn;
595    return result;
596}
597
598U_CAPI UKeyComparator *U_EXPORT2
599uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) {
600    UKeyComparator *result = hash->keyComparator;
601    hash->keyComparator = fn;
602    return result;
603}
604U_CAPI UValueComparator *U_EXPORT2
605uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){
606    UValueComparator *result = hash->valueComparator;
607    hash->valueComparator = fn;
608    return result;
609}
610
611U_CAPI UObjectDeleter *U_EXPORT2
612uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) {
613    UObjectDeleter *result = hash->keyDeleter;
614    hash->keyDeleter = fn;
615    return result;
616}
617
618U_CAPI UObjectDeleter *U_EXPORT2
619uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) {
620    UObjectDeleter *result = hash->valueDeleter;
621    hash->valueDeleter = fn;
622    return result;
623}
624
625U_CAPI void U_EXPORT2
626uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) {
627    UErrorCode status = U_ZERO_ERROR;
628    _uhash_internalSetResizePolicy(hash, policy);
629    hash->lowWaterMark  = (int32_t)(hash->length * hash->lowWaterRatio);
630    hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio);
631    _uhash_rehash(hash, &status);
632}
633
634U_CAPI int32_t U_EXPORT2
635uhash_count(const UHashtable *hash) {
636    return hash->count;
637}
638
639U_CAPI void* U_EXPORT2
640uhash_get(const UHashtable *hash,
641          const void* key) {
642    UHashTok keyholder;
643    keyholder.pointer = (void*) key;
644    return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
645}
646
647U_CAPI void* U_EXPORT2
648uhash_iget(const UHashtable *hash,
649           int32_t key) {
650    UHashTok keyholder;
651    keyholder.integer = key;
652    return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer;
653}
654
655U_CAPI int32_t U_EXPORT2
656uhash_geti(const UHashtable *hash,
657           const void* key) {
658    UHashTok keyholder;
659    keyholder.pointer = (void*) key;
660    return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
661}
662
663U_CAPI int32_t U_EXPORT2
664uhash_igeti(const UHashtable *hash,
665           int32_t key) {
666    UHashTok keyholder;
667    keyholder.integer = key;
668    return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
669}
670
671U_CAPI void* U_EXPORT2
672uhash_put(UHashtable *hash,
673          void* key,
674          void* value,
675          UErrorCode *status) {
676    UHashTok keyholder, valueholder;
677    keyholder.pointer = key;
678    valueholder.pointer = value;
679    return _uhash_put(hash, keyholder, valueholder,
680                      HINT_KEY_POINTER | HINT_VALUE_POINTER,
681                      status).pointer;
682}
683
684U_CAPI void* U_EXPORT2
685uhash_iput(UHashtable *hash,
686           int32_t key,
687           void* value,
688           UErrorCode *status) {
689    UHashTok keyholder, valueholder;
690    keyholder.integer = key;
691    valueholder.pointer = value;
692    return _uhash_put(hash, keyholder, valueholder,
693                      HINT_VALUE_POINTER,
694                      status).pointer;
695}
696
697U_CAPI int32_t U_EXPORT2
698uhash_puti(UHashtable *hash,
699           void* key,
700           int32_t value,
701           UErrorCode *status) {
702    UHashTok keyholder, valueholder;
703    keyholder.pointer = key;
704    valueholder.integer = value;
705    return _uhash_put(hash, keyholder, valueholder,
706                      HINT_KEY_POINTER,
707                      status).integer;
708}
709
710
711U_CAPI int32_t U_EXPORT2
712uhash_iputi(UHashtable *hash,
713           int32_t key,
714           int32_t value,
715           UErrorCode *status) {
716    UHashTok keyholder, valueholder;
717    keyholder.integer = key;
718    valueholder.integer = value;
719    return _uhash_put(hash, keyholder, valueholder,
720                      0, /* neither is a ptr */
721                      status).integer;
722}
723
724U_CAPI void* U_EXPORT2
725uhash_remove(UHashtable *hash,
726             const void* key) {
727    UHashTok keyholder;
728    keyholder.pointer = (void*) key;
729    return _uhash_remove(hash, keyholder).pointer;
730}
731
732U_CAPI void* U_EXPORT2
733uhash_iremove(UHashtable *hash,
734              int32_t key) {
735    UHashTok keyholder;
736    keyholder.integer = key;
737    return _uhash_remove(hash, keyholder).pointer;
738}
739
740U_CAPI int32_t U_EXPORT2
741uhash_removei(UHashtable *hash,
742              const void* key) {
743    UHashTok keyholder;
744    keyholder.pointer = (void*) key;
745    return _uhash_remove(hash, keyholder).integer;
746}
747
748U_CAPI int32_t U_EXPORT2
749uhash_iremovei(UHashtable *hash,
750               int32_t key) {
751    UHashTok keyholder;
752    keyholder.integer = key;
753    return _uhash_remove(hash, keyholder).integer;
754}
755
756U_CAPI void U_EXPORT2
757uhash_removeAll(UHashtable *hash) {
758    int32_t pos = -1;
759    const UHashElement *e;
760    U_ASSERT(hash != NULL);
761    if (hash->count != 0) {
762        while ((e = uhash_nextElement(hash, &pos)) != NULL) {
763            uhash_removeElement(hash, e);
764        }
765    }
766    U_ASSERT(hash->count == 0);
767}
768
769U_CAPI const UHashElement* U_EXPORT2
770uhash_find(const UHashtable *hash, const void* key) {
771    UHashTok keyholder;
772    const UHashElement *e;
773    keyholder.pointer = (void*) key;
774    e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
775    return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e;
776}
777
778U_CAPI const UHashElement* U_EXPORT2
779uhash_nextElement(const UHashtable *hash, int32_t *pos) {
780    /* Walk through the array until we find an element that is not
781     * EMPTY and not DELETED.
782     */
783    int32_t i;
784    U_ASSERT(hash != NULL);
785    for (i = *pos + 1; i < hash->length; ++i) {
786        if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) {
787            *pos = i;
788            return &(hash->elements[i]);
789        }
790    }
791
792    /* No more elements */
793    return NULL;
794}
795
796U_CAPI void* U_EXPORT2
797uhash_removeElement(UHashtable *hash, const UHashElement* e) {
798    U_ASSERT(hash != NULL);
799    U_ASSERT(e != NULL);
800    if (!IS_EMPTY_OR_DELETED(e->hashcode)) {
801        UHashElement *nce = (UHashElement *)e;
802        return _uhash_internalRemoveElement(hash, nce).pointer;
803    }
804    return NULL;
805}
806
807/********************************************************************
808 * UHashTok convenience
809 ********************************************************************/
810
811/**
812 * Return a UHashTok for an integer.
813 */
814/*U_CAPI UHashTok U_EXPORT2
815uhash_toki(int32_t i) {
816    UHashTok tok;
817    tok.integer = i;
818    return tok;
819}*/
820
821/**
822 * Return a UHashTok for a pointer.
823 */
824/*U_CAPI UHashTok U_EXPORT2
825uhash_tokp(void* p) {
826    UHashTok tok;
827    tok.pointer = p;
828    return tok;
829}*/
830
831/********************************************************************
832 * PUBLIC Key Hash Functions
833 ********************************************************************/
834
835/*
836  Compute the hash by iterating sparsely over about 32 (up to 63)
837  characters spaced evenly through the string.  For each character,
838  multiply the previous hash value by a prime number and add the new
839  character in, like a linear congruential random number generator,
840  producing a pseudorandom deterministic value well distributed over
841  the output range. [LIU]
842*/
843
844#define STRING_HASH(TYPE, STR, STRLEN, DEREF) \
845    int32_t hash = 0;                         \
846    const TYPE *p = (const TYPE*) STR;        \
847    if (p != NULL) {                          \
848        int32_t len = (int32_t)(STRLEN);      \
849        int32_t inc = ((len - 32) / 32) + 1;  \
850        const TYPE *limit = p + len;          \
851        while (p<limit) {                     \
852            hash = (hash * 37) + DEREF;       \
853            p += inc;                         \
854        }                                     \
855    }                                         \
856    return hash
857
858U_CAPI int32_t U_EXPORT2
859uhash_hashUChars(const UHashTok key) {
860    STRING_HASH(UChar, key.pointer, u_strlen(p), *p);
861}
862
863/* Used by UnicodeString to compute its hashcode - Not public API. */
864U_CAPI int32_t U_EXPORT2
865uhash_hashUCharsN(const UChar *str, int32_t length) {
866    STRING_HASH(UChar, str, length, *p);
867}
868
869U_CAPI int32_t U_EXPORT2
870uhash_hashChars(const UHashTok key) {
871    STRING_HASH(uint8_t, key.pointer, uprv_strlen((char*)p), *p);
872}
873
874U_CAPI int32_t U_EXPORT2
875uhash_hashIChars(const UHashTok key) {
876    STRING_HASH(uint8_t, key.pointer, uprv_strlen((char*)p), uprv_tolower(*p));
877}
878
879U_CAPI UBool U_EXPORT2
880uhash_equals(const UHashtable* hash1, const UHashtable* hash2){
881
882    int32_t count1, count2, pos, i;
883
884    if(hash1==hash2){
885        return TRUE;
886    }
887
888    /*
889     * Make sure that we are comparing 2 valid hashes of the same type
890     * with valid comparison functions.
891     * Without valid comparison functions, a binary comparison
892     * of the hash values will yield random results on machines
893     * with 64-bit pointers and 32-bit integer hashes.
894     * A valueComparator is normally optional.
895     */
896    if (hash1==NULL || hash2==NULL ||
897        hash1->keyComparator != hash2->keyComparator ||
898        hash1->valueComparator != hash2->valueComparator ||
899        hash1->valueComparator == NULL)
900    {
901        /*
902        Normally we would return an error here about incompatible hash tables,
903        but we return FALSE instead.
904        */
905        return FALSE;
906    }
907
908    count1 = uhash_count(hash1);
909    count2 = uhash_count(hash2);
910    if(count1!=count2){
911        return FALSE;
912    }
913
914    pos=-1;
915    for(i=0; i<count1; i++){
916        const UHashElement* elem1 = uhash_nextElement(hash1, &pos);
917        const UHashTok key1 = elem1->key;
918        const UHashTok val1 = elem1->value;
919        /* here the keys are not compared, instead the key form hash1 is used to fetch
920         * value from hash2. If the hashes are equal then then both hashes should
921         * contain equal values for the same key!
922         */
923        const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1));
924        const UHashTok val2 = elem2->value;
925        if(hash1->valueComparator(val1, val2)==FALSE){
926            return FALSE;
927        }
928    }
929    return TRUE;
930}
931
932/********************************************************************
933 * PUBLIC Comparator Functions
934 ********************************************************************/
935
936U_CAPI UBool U_EXPORT2
937uhash_compareUChars(const UHashTok key1, const UHashTok key2) {
938    const UChar *p1 = (const UChar*) key1.pointer;
939    const UChar *p2 = (const UChar*) key2.pointer;
940    if (p1 == p2) {
941        return TRUE;
942    }
943    if (p1 == NULL || p2 == NULL) {
944        return FALSE;
945    }
946    while (*p1 != 0 && *p1 == *p2) {
947        ++p1;
948        ++p2;
949    }
950    return (UBool)(*p1 == *p2);
951}
952
953U_CAPI UBool U_EXPORT2
954uhash_compareChars(const UHashTok key1, const UHashTok key2) {
955    const char *p1 = (const char*) key1.pointer;
956    const char *p2 = (const char*) key2.pointer;
957    if (p1 == p2) {
958        return TRUE;
959    }
960    if (p1 == NULL || p2 == NULL) {
961        return FALSE;
962    }
963    while (*p1 != 0 && *p1 == *p2) {
964        ++p1;
965        ++p2;
966    }
967    return (UBool)(*p1 == *p2);
968}
969
970U_CAPI UBool U_EXPORT2
971uhash_compareIChars(const UHashTok key1, const UHashTok key2) {
972    const char *p1 = (const char*) key1.pointer;
973    const char *p2 = (const char*) key2.pointer;
974    if (p1 == p2) {
975        return TRUE;
976    }
977    if (p1 == NULL || p2 == NULL) {
978        return FALSE;
979    }
980    while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) {
981        ++p1;
982        ++p2;
983    }
984    return (UBool)(*p1 == *p2);
985}
986
987/********************************************************************
988 * PUBLIC int32_t Support Functions
989 ********************************************************************/
990
991U_CAPI int32_t U_EXPORT2
992uhash_hashLong(const UHashTok key) {
993    return key.integer;
994}
995
996U_CAPI UBool U_EXPORT2
997uhash_compareLong(const UHashTok key1, const UHashTok key2) {
998    return (UBool)(key1.integer == key2.integer);
999}
1000
1001/********************************************************************
1002 * PUBLIC Deleter Functions
1003 ********************************************************************/
1004
1005U_CAPI void U_EXPORT2
1006uhash_freeBlock(void *obj) {
1007    uprv_free(obj);
1008}
1009
1010