1/*
2 * Copyright © 2012  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_SET_PRIVATE_HH
28#define HB_SET_PRIVATE_HH
29
30#include "hb-private.hh"
31#include "hb-object-private.hh"
32
33
34/*
35 * The set digests here implement various "filters" that support
36 * "approximate member query".  Conceptually these are like Bloom
37 * Filter and Quotient Filter, however, much smaller, faster, and
38 * designed to fit the requirements of our uses for glyph coverage
39 * queries.  As a result, our filters have much higher.
40 */
41
42template <typename mask_t, unsigned int shift>
43struct hb_set_digest_lowest_bits_t
44{
45  ASSERT_POD ();
46
47  static const unsigned int mask_bytes = sizeof (mask_t);
48  static const unsigned int mask_bits = sizeof (mask_t) * 8;
49  static const unsigned int num_bits = 0
50				     + (mask_bytes >= 1 ? 3 : 0)
51				     + (mask_bytes >= 2 ? 1 : 0)
52				     + (mask_bytes >= 4 ? 1 : 0)
53				     + (mask_bytes >= 8 ? 1 : 0)
54				     + (mask_bytes >= 16? 1 : 0)
55				     + 0;
56
57  ASSERT_STATIC (shift < sizeof (hb_codepoint_t) * 8);
58  ASSERT_STATIC (shift + num_bits <= sizeof (hb_codepoint_t) * 8);
59
60  inline void init (void) {
61    mask = 0;
62  }
63
64  inline void add (hb_codepoint_t g) {
65    mask |= mask_for (g);
66  }
67
68  inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
69    if ((b >> shift) - (a >> shift) >= mask_bits - 1)
70      mask = (mask_t) -1;
71    else {
72      mask_t ma = mask_for (a);
73      mask_t mb = mask_for (b);
74      mask |= mb + (mb - ma) - (mb < ma);
75    }
76  }
77
78  inline bool may_have (hb_codepoint_t g) const {
79    return !!(mask & mask_for (g));
80  }
81
82  private:
83
84  static inline mask_t mask_for (hb_codepoint_t g) {
85    return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
86  }
87  mask_t mask;
88};
89
90template <typename head_t, typename tail_t>
91struct hb_set_digest_combiner_t
92{
93  ASSERT_POD ();
94
95  inline void init (void) {
96    head.init ();
97    tail.init ();
98  }
99
100  inline void add (hb_codepoint_t g) {
101    head.add (g);
102    tail.add (g);
103  }
104
105  inline void add_range (hb_codepoint_t a, hb_codepoint_t b) {
106    head.add_range (a, b);
107    tail.add_range (a, b);
108  }
109
110  inline bool may_have (hb_codepoint_t g) const {
111    return head.may_have (g) && tail.may_have (g);
112  }
113
114  private:
115  head_t head;
116  tail_t tail;
117};
118
119
120/*
121 * hb_set_digest_t
122 *
123 * This is a combination of digests that performs "best".
124 * There is not much science to this: it's a result of intuition
125 * and testing.
126 */
127typedef hb_set_digest_combiner_t
128<
129  hb_set_digest_lowest_bits_t<unsigned long, 4>,
130  hb_set_digest_combiner_t
131  <
132    hb_set_digest_lowest_bits_t<unsigned long, 0>,
133    hb_set_digest_lowest_bits_t<unsigned long, 9>
134  >
135> hb_set_digest_t;
136
137
138
139/*
140 * hb_set_t
141 */
142
143
144/* TODO Make this faster and memmory efficient. */
145
146struct hb_set_t
147{
148  hb_object_header_t header;
149  ASSERT_POD ();
150  bool in_error;
151
152  inline void init (void) {
153    header.init ();
154    clear ();
155  }
156  inline void fini (void) {
157  }
158  inline void clear (void) {
159    if (unlikely (hb_object_is_inert (this)))
160      return;
161    in_error = false;
162    memset (elts, 0, sizeof elts);
163  }
164  inline bool is_empty (void) const {
165    for (unsigned int i = 0; i < ARRAY_LENGTH (elts); i++)
166      if (elts[i])
167        return false;
168    return true;
169  }
170  inline void add (hb_codepoint_t g)
171  {
172    if (unlikely (in_error)) return;
173    if (unlikely (g == INVALID)) return;
174    if (unlikely (g > MAX_G)) return;
175    elt (g) |= mask (g);
176  }
177  inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
178  {
179    if (unlikely (in_error)) return;
180    /* TODO Speedup */
181    for (unsigned int i = a; i < b + 1; i++)
182      add (i);
183  }
184  inline void del (hb_codepoint_t g)
185  {
186    if (unlikely (in_error)) return;
187    if (unlikely (g > MAX_G)) return;
188    elt (g) &= ~mask (g);
189  }
190  inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
191  {
192    if (unlikely (in_error)) return;
193    /* TODO Speedup */
194    for (unsigned int i = a; i < b + 1; i++)
195      del (i);
196  }
197  inline bool has (hb_codepoint_t g) const
198  {
199    if (unlikely (g > MAX_G)) return false;
200    return !!(elt (g) & mask (g));
201  }
202  inline bool intersects (hb_codepoint_t first,
203			  hb_codepoint_t last) const
204  {
205    if (unlikely (first > MAX_G)) return false;
206    if (unlikely (last  > MAX_G)) last = MAX_G;
207    unsigned int end = last + 1;
208    for (hb_codepoint_t i = first; i < end; i++)
209      if (has (i))
210        return true;
211    return false;
212  }
213  inline bool is_equal (const hb_set_t *other) const
214  {
215    for (unsigned int i = 0; i < ELTS; i++)
216      if (elts[i] != other->elts[i])
217        return false;
218    return true;
219  }
220  inline void set (const hb_set_t *other)
221  {
222    if (unlikely (in_error)) return;
223    for (unsigned int i = 0; i < ELTS; i++)
224      elts[i] = other->elts[i];
225  }
226  inline void union_ (const hb_set_t *other)
227  {
228    if (unlikely (in_error)) return;
229    for (unsigned int i = 0; i < ELTS; i++)
230      elts[i] |= other->elts[i];
231  }
232  inline void intersect (const hb_set_t *other)
233  {
234    if (unlikely (in_error)) return;
235    for (unsigned int i = 0; i < ELTS; i++)
236      elts[i] &= other->elts[i];
237  }
238  inline void subtract (const hb_set_t *other)
239  {
240    if (unlikely (in_error)) return;
241    for (unsigned int i = 0; i < ELTS; i++)
242      elts[i] &= ~other->elts[i];
243  }
244  inline void symmetric_difference (const hb_set_t *other)
245  {
246    if (unlikely (in_error)) return;
247    for (unsigned int i = 0; i < ELTS; i++)
248      elts[i] ^= other->elts[i];
249  }
250  inline void invert (void)
251  {
252    if (unlikely (in_error)) return;
253    for (unsigned int i = 0; i < ELTS; i++)
254      elts[i] = ~elts[i];
255  }
256  inline bool next (hb_codepoint_t *codepoint) const
257  {
258    if (unlikely (*codepoint == INVALID)) {
259      hb_codepoint_t i = get_min ();
260      if (i != INVALID) {
261        *codepoint = i;
262	return true;
263      } else {
264	*codepoint = INVALID;
265        return false;
266      }
267    }
268    for (hb_codepoint_t i = *codepoint + 1; i < MAX_G + 1; i++)
269      if (has (i)) {
270        *codepoint = i;
271	return true;
272      }
273    *codepoint = INVALID;
274    return false;
275  }
276  inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
277  {
278    hb_codepoint_t i;
279
280    i = *last;
281    if (!next (&i))
282    {
283      *last = *first = INVALID;
284      return false;
285    }
286
287    *last = *first = i;
288    while (next (&i) && i == *last + 1)
289      (*last)++;
290
291    return true;
292  }
293
294  inline unsigned int get_population (void) const
295  {
296    unsigned int count = 0;
297    for (unsigned int i = 0; i < ELTS; i++)
298      count += _hb_popcount32 (elts[i]);
299    return count;
300  }
301  inline hb_codepoint_t get_min (void) const
302  {
303    for (unsigned int i = 0; i < ELTS; i++)
304      if (elts[i])
305	for (unsigned int j = 0; j < BITS; j++)
306	  if (elts[i] & (1 << j))
307	    return i * BITS + j;
308    return INVALID;
309  }
310  inline hb_codepoint_t get_max (void) const
311  {
312    for (unsigned int i = ELTS; i; i--)
313      if (elts[i - 1])
314	for (unsigned int j = BITS; j; j--)
315	  if (elts[i - 1] & (1 << (j - 1)))
316	    return (i - 1) * BITS + (j - 1);
317    return INVALID;
318  }
319
320  typedef uint32_t elt_t;
321  static const unsigned int MAX_G = 65536 - 1; /* XXX Fix this... */
322  static const unsigned int SHIFT = 5;
323  static const unsigned int BITS = (1 << SHIFT);
324  static const unsigned int MASK = BITS - 1;
325  static const unsigned int ELTS = (MAX_G + 1 + (BITS - 1)) / BITS;
326  static  const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
327
328  elt_t &elt (hb_codepoint_t g) { return elts[g >> SHIFT]; }
329  elt_t elt (hb_codepoint_t g) const { return elts[g >> SHIFT]; }
330  elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & MASK); }
331
332  elt_t elts[ELTS]; /* XXX 8kb */
333
334  ASSERT_STATIC (sizeof (elt_t) * 8 == BITS);
335  ASSERT_STATIC (sizeof (elt_t) * 8 * ELTS > MAX_G);
336};
337
338
339
340#endif /* HB_SET_PRIVATE_HH */
341