1#include <stdlib.h>
2#include <inttypes.h>
3
4#include "bloom.h"
5#include "../hash.h"
6#include "../minmax.h"
7#include "../crc/xxhash.h"
8#include "../crc/murmur3.h"
9#include "../crc/crc32c.h"
10#include "../crc/fnv.h"
11
12struct bloom {
13	uint64_t nentries;
14
15	uint32_t *map;
16};
17
18#define BITS_PER_INDEX	(sizeof(uint32_t) * 8)
19#define BITS_INDEX_MASK	(BITS_PER_INDEX - 1)
20
21struct bloom_hash {
22	unsigned int seed;
23	uint32_t (*fn)(const void *, uint32_t, uint32_t);
24};
25
26static uint32_t bloom_crc32c(const void *buf, uint32_t len, uint32_t seed)
27{
28	return fio_crc32c(buf, len);
29}
30
31static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed)
32{
33	return fnv(buf, len, seed);
34}
35
36#define BLOOM_SEED	0x8989
37
38struct bloom_hash hashes[] = {
39	{
40		.seed = BLOOM_SEED,
41		.fn = jhash,
42	},
43	{
44		.seed = BLOOM_SEED,
45		.fn = XXH32,
46	},
47	{
48		.seed = BLOOM_SEED,
49		.fn = murmurhash3,
50	},
51	{
52		.seed = BLOOM_SEED,
53		.fn = bloom_crc32c,
54	},
55	{
56		.seed = BLOOM_SEED,
57		.fn = bloom_fnv,
58	},
59};
60
61#define N_HASHES	5
62
63#define MIN_ENTRIES	1073741824UL
64
65struct bloom *bloom_new(uint64_t entries)
66{
67	struct bloom *b;
68	size_t no_uints;
69
70	crc32c_intel_probe();
71
72	b = malloc(sizeof(*b));
73	b->nentries = entries;
74	no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
75	no_uints = max((unsigned long) no_uints, MIN_ENTRIES);
76	b->map = calloc(no_uints, sizeof(uint32_t));
77	if (!b->map) {
78		free(b);
79		return NULL;
80	}
81
82	return b;
83}
84
85void bloom_free(struct bloom *b)
86{
87	free(b->map);
88	free(b);
89}
90
91static int __bloom_check(struct bloom *b, uint32_t *data, unsigned int nwords,
92			 int set)
93{
94	uint32_t hash[N_HASHES];
95	int i, was_set;
96
97	for (i = 0; i < N_HASHES; i++) {
98		hash[i] = hashes[i].fn(data, nwords, hashes[i].seed);
99		hash[i] = hash[i] % b->nentries;
100	}
101
102	was_set = 0;
103	for (i = 0; i < N_HASHES; i++) {
104		const unsigned int index = hash[i] / BITS_PER_INDEX;
105		const unsigned int bit = hash[i] & BITS_INDEX_MASK;
106
107		if (b->map[index] & (1U << bit))
108			was_set++;
109		if (set)
110			b->map[index] |= 1U << bit;
111	}
112
113	return was_set == N_HASHES;
114}
115
116int bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords)
117{
118	return __bloom_check(b, data, nwords, 1);
119}
120