1#include <stdlib.h> 2#include <inttypes.h> 3 4#include "bloom.h" 5#include "../hash.h" 6#include "../minmax.h" 7#include "../crc/xxhash.h" 8#include "../crc/murmur3.h" 9#include "../crc/crc32c.h" 10#include "../crc/fnv.h" 11 12struct bloom { 13 uint64_t nentries; 14 15 uint32_t *map; 16}; 17 18#define BITS_PER_INDEX (sizeof(uint32_t) * 8) 19#define BITS_INDEX_MASK (BITS_PER_INDEX - 1) 20 21struct bloom_hash { 22 unsigned int seed; 23 uint32_t (*fn)(const void *, uint32_t, uint32_t); 24}; 25 26static uint32_t bloom_crc32c(const void *buf, uint32_t len, uint32_t seed) 27{ 28 return fio_crc32c(buf, len); 29} 30 31static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed) 32{ 33 return fnv(buf, len, seed); 34} 35 36#define BLOOM_SEED 0x8989 37 38struct bloom_hash hashes[] = { 39 { 40 .seed = BLOOM_SEED, 41 .fn = jhash, 42 }, 43 { 44 .seed = BLOOM_SEED, 45 .fn = XXH32, 46 }, 47 { 48 .seed = BLOOM_SEED, 49 .fn = murmurhash3, 50 }, 51 { 52 .seed = BLOOM_SEED, 53 .fn = bloom_crc32c, 54 }, 55 { 56 .seed = BLOOM_SEED, 57 .fn = bloom_fnv, 58 }, 59}; 60 61#define N_HASHES 5 62 63#define MIN_ENTRIES 1073741824UL 64 65struct bloom *bloom_new(uint64_t entries) 66{ 67 struct bloom *b; 68 size_t no_uints; 69 70 crc32c_intel_probe(); 71 72 b = malloc(sizeof(*b)); 73 b->nentries = entries; 74 no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX; 75 no_uints = max((unsigned long) no_uints, MIN_ENTRIES); 76 b->map = calloc(no_uints, sizeof(uint32_t)); 77 if (!b->map) { 78 free(b); 79 return NULL; 80 } 81 82 return b; 83} 84 85void bloom_free(struct bloom *b) 86{ 87 free(b->map); 88 free(b); 89} 90 91static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords, 92 int set) 93{ 94 uint32_t hash[N_HASHES]; 95 int i, was_set; 96 97 for (i = 0; i < N_HASHES; i++) { 98 hash[i] = hashes[i].fn(data, nwords, hashes[i].seed); 99 hash[i] = hash[i] % b->nentries; 100 } 101 102 was_set = 0; 103 for (i = 0; i < N_HASHES; i++) { 104 const unsigned int index = hash[i] / BITS_PER_INDEX; 105 const unsigned int bit = hash[i] & BITS_INDEX_MASK; 106 107 if (b->map[index] & (1U << bit)) 108 was_set++; 109 if (set) 110 b->map[index] |= 1U << bit; 111 } 112 113 return was_set == N_HASHES; 114} 115 116int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords) 117{ 118 return __bloom_check(b, data, nwords, 1); 119} 120