1/* NOLINT(build/header_guard) */
2/* Copyright 2010 Google Inc. All Rights Reserved.
3
4   Distributed under MIT license.
5   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6*/
7
8/* template parameters: FN */
9
10/* A (forgetful) hash table to the data seen by the compressor, to
11   help create backward references to previous data.
12
13   This is a hash map of fixed size (bucket_size_) to a ring buffer of
14   fixed size (block_size_). The ring buffer contains the last block_size_
15   index positions of the given hash key in the compressed data. */
16
17#define HashLongestMatch HASHER()
18
19static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
20static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
21
22/* HashBytes is the function that chooses the bucket to place the address in. */
23static uint32_t FN(HashBytes)(const uint8_t *data, const int shift) {
24  uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
25  /* The higher bits contain more mixture from the multiplication,
26     so we take our results from there. */
27  return (uint32_t)(h >> shift);
28}
29
30typedef struct HashLongestMatch {
31  /* Number of hash buckets. */
32  size_t bucket_size_;
33  /* Only block_size_ newest backward references are kept,
34     and the older are forgotten. */
35  size_t block_size_;
36  /* Left-shift for computing hash bucket index from hash value. */
37  int hash_shift_;
38  /* Mask for accessing entries in a block (in a ring-buffer manner). */
39  uint32_t block_mask_;
40
41  /* --- Dynamic size members --- */
42
43  /* Number of entries in a particular bucket. */
44  /* uint16_t num[bucket_size]; */
45
46  /* Buckets containing block_size_ of backward references. */
47  /* uint32_t* buckets[bucket_size * block_size]; */
48} HashLongestMatch;
49
50static BROTLI_INLINE HashLongestMatch* FN(Self)(HasherHandle handle) {
51  return (HashLongestMatch*)&(GetHasherCommon(handle)[1]);
52}
53
54static BROTLI_INLINE uint16_t* FN(Num)(HashLongestMatch* self) {
55  return (uint16_t*)(&self[1]);
56}
57
58static BROTLI_INLINE uint32_t* FN(Buckets)(HashLongestMatch* self) {
59  return (uint32_t*)(&FN(Num)(self)[self->bucket_size_]);
60}
61
62static void FN(Initialize)(
63    HasherHandle handle, const BrotliEncoderParams* params) {
64  HasherCommon* common = GetHasherCommon(handle);
65  HashLongestMatch* self = FN(Self)(handle);
66  BROTLI_UNUSED(params);
67  self->hash_shift_ = 32 - common->params.bucket_bits;
68  self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
69  self->block_size_ = (size_t)1 << common->params.block_bits;
70  self->block_mask_ = (uint32_t)(self->block_size_ - 1);
71}
72
73static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
74    size_t input_size, const uint8_t* data) {
75  HashLongestMatch* self = FN(Self)(handle);
76  uint16_t* num = FN(Num)(self);
77  /* Partial preparation is 100 times slower (per socket). */
78  size_t partial_prepare_threshold = self->bucket_size_ >> 6;
79  if (one_shot && input_size <= partial_prepare_threshold) {
80    size_t i;
81    for (i = 0; i < input_size; ++i) {
82      const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_);
83      num[key] = 0;
84    }
85  } else {
86    memset(num, 0, self->bucket_size_ * sizeof(num[0]));
87  }
88}
89
90static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
91    const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
92    size_t input_size) {
93  size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
94  size_t block_size = (size_t)1 << params->hasher.block_bits;
95  BROTLI_UNUSED(one_shot);
96  BROTLI_UNUSED(input_size);
97  return sizeof(HashLongestMatch) + bucket_size * (2 + 4 * block_size);
98}
99
100/* Look at 4 bytes at &data[ix & mask].
101   Compute a hash from these, and store the value of ix at that position. */
102static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
103    const size_t mask, const size_t ix) {
104  HashLongestMatch* self = FN(Self)(handle);
105  uint16_t* num = FN(Num)(self);
106  const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
107  const size_t minor_ix = num[key] & self->block_mask_;
108  const size_t offset =
109      minor_ix + (key << GetHasherCommon(handle)->params.block_bits);
110  FN(Buckets)(self)[offset] = (uint32_t)ix;
111  ++num[key];
112}
113
114static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
115    const uint8_t *data, const size_t mask, const size_t ix_start,
116    const size_t ix_end) {
117  size_t i;
118  for (i = ix_start; i < ix_end; ++i) {
119    FN(Store)(handle, data, mask, i);
120  }
121}
122
123static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
124    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
125    size_t ringbuffer_mask) {
126  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
127    /* Prepare the hashes for three last bytes of the last write.
128       These could not be calculated before, since they require knowledge
129       of both the previous and the current block. */
130    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
131    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
132    FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
133  }
134}
135
136static BROTLI_INLINE void FN(PrepareDistanceCache)(
137    HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
138  PrepareDistanceCache(distance_cache,
139      GetHasherCommon(handle)->params.num_last_distances_to_check);
140}
141
142/* Find a longest backward match of &data[cur_ix] up to the length of
143   max_length and stores the position cur_ix in the hash table.
144
145   REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
146             values; if this method is invoked repeatedly with the same distance
147             cache values, it is enough to invoke FN(PrepareDistanceCache) once.
148
149   Does not look for matches longer than max_length.
150   Does not look for matches further away than max_backward.
151   Writes the best match into |out|.
152   Returns true when match is found, otherwise false. */
153static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HasherHandle handle,
154    const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
155    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
156    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
157    const size_t max_length, const size_t max_backward,
158    HasherSearchResult* BROTLI_RESTRICT out) {
159  HasherCommon* common = GetHasherCommon(handle);
160  HashLongestMatch* self = FN(Self)(handle);
161  uint16_t* num = FN(Num)(self);
162  uint32_t* buckets = FN(Buckets)(self);
163  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
164  BROTLI_BOOL is_match_found = BROTLI_FALSE;
165  /* Don't accept a short copy from far away. */
166  score_t best_score = out->score;
167  size_t best_len = out->len;
168  size_t i;
169  out->len = 0;
170  out->len_x_code = 0;
171  /* Try last distance first. */
172  for (i = 0; i < (size_t)common->params.num_last_distances_to_check; ++i) {
173    const size_t backward = (size_t)distance_cache[i];
174    size_t prev_ix = (size_t)(cur_ix - backward);
175    if (prev_ix >= cur_ix) {
176      continue;
177    }
178    if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
179      continue;
180    }
181    prev_ix &= ring_buffer_mask;
182
183    if (cur_ix_masked + best_len > ring_buffer_mask ||
184        prev_ix + best_len > ring_buffer_mask ||
185        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
186      continue;
187    }
188    {
189      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
190                                                  &data[cur_ix_masked],
191                                                  max_length);
192      if (len >= 3 || (len == 2 && i < 2)) {
193        /* Comparing for >= 2 does not change the semantics, but just saves for
194           a few unnecessary binary logarithms in backward reference score,
195           since we are not interested in such short matches. */
196        score_t score = BackwardReferenceScoreUsingLastDistance(len);
197        if (best_score < score) {
198          if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
199          if (best_score < score) {
200            best_score = score;
201            best_len = len;
202            out->len = best_len;
203            out->distance = backward;
204            out->score = best_score;
205            is_match_found = BROTLI_TRUE;
206          }
207        }
208      }
209    }
210  }
211  {
212    const uint32_t key =
213        FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
214    uint32_t* BROTLI_RESTRICT bucket =
215        &buckets[key << common->params.block_bits];
216    const size_t down =
217        (num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
218    for (i = num[key]; i > down;) {
219      size_t prev_ix = bucket[--i & self->block_mask_];
220      const size_t backward = cur_ix - prev_ix;
221      if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
222        break;
223      }
224      prev_ix &= ring_buffer_mask;
225      if (cur_ix_masked + best_len > ring_buffer_mask ||
226          prev_ix + best_len > ring_buffer_mask ||
227          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
228        continue;
229      }
230      {
231        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
232                                                    &data[cur_ix_masked],
233                                                    max_length);
234        if (len >= 4) {
235          /* Comparing for >= 3 does not change the semantics, but just saves
236             for a few unnecessary binary logarithms in backward reference
237             score, since we are not interested in such short matches. */
238          score_t score = BackwardReferenceScore(len, backward);
239          if (best_score < score) {
240            best_score = score;
241            best_len = len;
242            out->len = best_len;
243            out->distance = backward;
244            out->score = best_score;
245            is_match_found = BROTLI_TRUE;
246          }
247        }
248      }
249    }
250    bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
251    ++num[key];
252  }
253  if (!is_match_found) {
254    is_match_found = SearchInStaticDictionary(dictionary, dictionary_hash,
255        handle, &data[cur_ix_masked], max_length, max_backward, out,
256        BROTLI_FALSE);
257  }
258  return is_match_found;
259}
260
261#undef HashLongestMatch
262