1/*
2 * Copyright © 2014  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_OT_CMAP_TABLE_HH
28#define HB_OT_CMAP_TABLE_HH
29
30#include "hb-open-type-private.hh"
31
32
33namespace OT {
34
35
36/*
37 * cmap -- Character To Glyph Index Mapping Table
38 */
39
40#define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
41
42
43struct CmapSubtableFormat0
44{
45  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
46  {
47    hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
48    if (!gid)
49      return false;
50    *glyph = gid;
51    return true;
52  }
53
54  inline bool sanitize (hb_sanitize_context_t *c) const
55  {
56    TRACE_SANITIZE (this);
57    return_trace (c->check_struct (this));
58  }
59
60  protected:
61  USHORT	format;		/* Format number is set to 0. */
62  USHORT	lengthZ;	/* Byte length of this subtable. */
63  USHORT	languageZ;	/* Ignore. */
64  BYTE		glyphIdArray[256];/* An array that maps character
65				 * code to glyph index values. */
66  public:
67  DEFINE_SIZE_STATIC (6 + 256);
68};
69
70struct CmapSubtableFormat4
71{
72  struct accelerator_t
73  {
74    inline void init (const CmapSubtableFormat4 *subtable)
75    {
76      segCount = subtable->segCountX2 / 2;
77      endCount = subtable->values;
78      startCount = endCount + segCount + 1;
79      idDelta = startCount + segCount;
80      idRangeOffset = idDelta + segCount;
81      glyphIdArray = idRangeOffset + segCount;
82      glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
83    }
84
85    static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
86    {
87      const accelerator_t *thiz = (const accelerator_t *) obj;
88
89      /* Custom two-array bsearch. */
90      int min = 0, max = (int) thiz->segCount - 1;
91      const USHORT *startCount = thiz->startCount;
92      const USHORT *endCount = thiz->endCount;
93      unsigned int i;
94      while (min <= max)
95      {
96	int mid = (min + max) / 2;
97	if (codepoint < startCount[mid])
98	  max = mid - 1;
99	else if (codepoint > endCount[mid])
100	  min = mid + 1;
101	else
102	{
103	  i = mid;
104	  goto found;
105	}
106      }
107      return false;
108
109    found:
110      hb_codepoint_t gid;
111      unsigned int rangeOffset = thiz->idRangeOffset[i];
112      if (rangeOffset == 0)
113	gid = codepoint + thiz->idDelta[i];
114      else
115      {
116	/* Somebody has been smoking... */
117	unsigned int index = rangeOffset / 2 + (codepoint - thiz->startCount[i]) + i - thiz->segCount;
118	if (unlikely (index >= thiz->glyphIdArrayLength))
119	  return false;
120	gid = thiz->glyphIdArray[index];
121	if (unlikely (!gid))
122	  return false;
123	gid += thiz->idDelta[i];
124      }
125
126      *glyph = gid & 0xFFFFu;
127      return true;
128    }
129
130    const USHORT *endCount;
131    const USHORT *startCount;
132    const USHORT *idDelta;
133    const USHORT *idRangeOffset;
134    const USHORT *glyphIdArray;
135    unsigned int segCount;
136    unsigned int glyphIdArrayLength;
137  };
138
139  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
140  {
141    accelerator_t accel;
142    accel.init (this);
143    return accel.get_glyph_func (&accel, codepoint, glyph);
144  }
145
146  inline bool sanitize (hb_sanitize_context_t *c) const
147  {
148    TRACE_SANITIZE (this);
149    if (unlikely (!c->check_struct (this)))
150      return_trace (false);
151
152    if (unlikely (!c->check_range (this, length)))
153    {
154      /* Some broken fonts have too long of a "length" value.
155       * If that is the case, just change the value to truncate
156       * the subtable at the end of the blob. */
157      uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
158					    (uintptr_t) (c->end -
159							 (char *) this));
160      if (!c->try_set (&length, new_length))
161	return_trace (false);
162    }
163
164    return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
165  }
166
167  protected:
168  USHORT	format;		/* Format number is set to 4. */
169  USHORT	length;		/* This is the length in bytes of the
170				 * subtable. */
171  USHORT	languageZ;	/* Ignore. */
172  USHORT	segCountX2;	/* 2 x segCount. */
173  USHORT	searchRangeZ;	/* 2 * (2**floor(log2(segCount))) */
174  USHORT	entrySelectorZ;	/* log2(searchRange/2) */
175  USHORT	rangeShiftZ;	/* 2 x segCount - searchRange */
176
177  USHORT	values[VAR];
178#if 0
179  USHORT	endCount[segCount];	/* End characterCode for each segment,
180					 * last=0xFFFFu. */
181  USHORT	reservedPad;		/* Set to 0. */
182  USHORT	startCount[segCount];	/* Start character code for each segment. */
183  SHORT		idDelta[segCount];	/* Delta for all character codes in segment. */
184  USHORT	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
185  USHORT	glyphIdArray[VAR];	/* Glyph index array (arbitrary length) */
186#endif
187
188  public:
189  DEFINE_SIZE_ARRAY (14, values);
190};
191
192struct CmapSubtableLongGroup
193{
194  friend struct CmapSubtableFormat12;
195  friend struct CmapSubtableFormat13;
196
197  int cmp (hb_codepoint_t codepoint) const
198  {
199    if (codepoint < startCharCode) return -1;
200    if (codepoint > endCharCode)   return +1;
201    return 0;
202  }
203
204  inline bool sanitize (hb_sanitize_context_t *c) const
205  {
206    TRACE_SANITIZE (this);
207    return_trace (c->check_struct (this));
208  }
209
210  private:
211  ULONG		startCharCode;	/* First character code in this group. */
212  ULONG		endCharCode;	/* Last character code in this group. */
213  ULONG		glyphID;	/* Glyph index; interpretation depends on
214				 * subtable format. */
215  public:
216  DEFINE_SIZE_STATIC (12);
217};
218
219template <typename UINT>
220struct CmapSubtableTrimmed
221{
222  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
223  {
224    /* Rely on our implicit array bound-checking. */
225    hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
226    if (!gid)
227      return false;
228    *glyph = gid;
229    return true;
230  }
231
232  inline bool sanitize (hb_sanitize_context_t *c) const
233  {
234    TRACE_SANITIZE (this);
235    return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
236  }
237
238  protected:
239  UINT		formatReserved;	/* Subtable format and (maybe) padding. */
240  UINT		lengthZ;	/* Byte length of this subtable. */
241  UINT		languageZ;	/* Ignore. */
242  UINT		startCharCode;	/* First character code covered. */
243  ArrayOf<GlyphID, UINT>
244		glyphIdArray;	/* Array of glyph index values for character
245				 * codes in the range. */
246  public:
247  DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
248};
249
250struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
251struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
252
253template <typename T>
254struct CmapSubtableLongSegmented
255{
256  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
257  {
258    int i = groups.bsearch (codepoint);
259    if (i == -1)
260      return false;
261    *glyph = T::group_get_glyph (groups[i], codepoint);
262    return true;
263  }
264
265  inline bool sanitize (hb_sanitize_context_t *c) const
266  {
267    TRACE_SANITIZE (this);
268    return_trace (c->check_struct (this) && groups.sanitize (c));
269  }
270
271  protected:
272  USHORT	format;		/* Subtable format; set to 12. */
273  USHORT	reservedZ;	/* Reserved; set to 0. */
274  ULONG		lengthZ;	/* Byte length of this subtable. */
275  ULONG		languageZ;	/* Ignore. */
276  SortedArrayOf<CmapSubtableLongGroup, ULONG>
277		groups;		/* Groupings. */
278  public:
279  DEFINE_SIZE_ARRAY (16, groups);
280};
281
282struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
283{
284  static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
285						hb_codepoint_t u)
286  { return group.glyphID + (u - group.startCharCode); }
287};
288
289struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
290{
291  static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
292						hb_codepoint_t u HB_UNUSED)
293  { return group.glyphID; }
294};
295
296typedef enum
297{
298  GLYPH_VARIANT_NOT_FOUND = 0,
299  GLYPH_VARIANT_FOUND = 1,
300  GLYPH_VARIANT_USE_DEFAULT = 2
301} glyph_variant_t;
302
303struct UnicodeValueRange
304{
305  inline int cmp (const hb_codepoint_t &codepoint) const
306  {
307    if (codepoint < startUnicodeValue) return -1;
308    if (codepoint > startUnicodeValue + additionalCount) return +1;
309    return 0;
310  }
311
312  inline bool sanitize (hb_sanitize_context_t *c) const
313  {
314    TRACE_SANITIZE (this);
315    return_trace (c->check_struct (this));
316  }
317
318  UINT24	startUnicodeValue;	/* First value in this range. */
319  BYTE		additionalCount;	/* Number of additional values in this
320					 * range. */
321  public:
322  DEFINE_SIZE_STATIC (4);
323};
324
325typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS;
326
327struct UVSMapping
328{
329  inline int cmp (const hb_codepoint_t &codepoint) const
330  {
331    return unicodeValue.cmp (codepoint);
332  }
333
334  inline bool sanitize (hb_sanitize_context_t *c) const
335  {
336    TRACE_SANITIZE (this);
337    return_trace (c->check_struct (this));
338  }
339
340  UINT24	unicodeValue;	/* Base Unicode value of the UVS */
341  GlyphID	glyphID;	/* Glyph ID of the UVS */
342  public:
343  DEFINE_SIZE_STATIC (5);
344};
345
346typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS;
347
348struct VariationSelectorRecord
349{
350  inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
351				    hb_codepoint_t *glyph,
352				    const void *base) const
353  {
354    int i;
355    const DefaultUVS &defaults = base+defaultUVS;
356    i = defaults.bsearch (codepoint);
357    if (i != -1)
358      return GLYPH_VARIANT_USE_DEFAULT;
359    const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
360    i = nonDefaults.bsearch (codepoint);
361    if (i != -1)
362    {
363      *glyph = nonDefaults[i].glyphID;
364       return GLYPH_VARIANT_FOUND;
365    }
366    return GLYPH_VARIANT_NOT_FOUND;
367  }
368
369  inline int cmp (const hb_codepoint_t &variation_selector) const
370  {
371    return varSelector.cmp (variation_selector);
372  }
373
374  inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
375  {
376    TRACE_SANITIZE (this);
377    return_trace (c->check_struct (this) &&
378		  defaultUVS.sanitize (c, base) &&
379		  nonDefaultUVS.sanitize (c, base));
380  }
381
382  UINT24	varSelector;	/* Variation selector. */
383  OffsetTo<DefaultUVS, ULONG>
384		defaultUVS;	/* Offset to Default UVS Table. May be 0. */
385  OffsetTo<NonDefaultUVS, ULONG>
386		nonDefaultUVS;	/* Offset to Non-Default UVS Table. May be 0. */
387  public:
388  DEFINE_SIZE_STATIC (11);
389};
390
391struct CmapSubtableFormat14
392{
393  inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
394					    hb_codepoint_t variation_selector,
395					    hb_codepoint_t *glyph) const
396  {
397    return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
398  }
399
400  inline bool sanitize (hb_sanitize_context_t *c) const
401  {
402    TRACE_SANITIZE (this);
403    return_trace (c->check_struct (this) &&
404		  record.sanitize (c, this));
405  }
406
407  protected:
408  USHORT	format;		/* Format number is set to 14. */
409  ULONG		lengthZ;	/* Byte length of this subtable. */
410  SortedArrayOf<VariationSelectorRecord, ULONG>
411		record;		/* Variation selector records; sorted
412				 * in increasing order of `varSelector'. */
413  public:
414  DEFINE_SIZE_ARRAY (10, record);
415};
416
417struct CmapSubtable
418{
419  /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
420
421  inline bool get_glyph (hb_codepoint_t codepoint,
422			 hb_codepoint_t *glyph) const
423  {
424    switch (u.format) {
425    case  0: return u.format0 .get_glyph(codepoint, glyph);
426    case  4: return u.format4 .get_glyph(codepoint, glyph);
427    case  6: return u.format6 .get_glyph(codepoint, glyph);
428    case 10: return u.format10.get_glyph(codepoint, glyph);
429    case 12: return u.format12.get_glyph(codepoint, glyph);
430    case 13: return u.format13.get_glyph(codepoint, glyph);
431    case 14:
432    default: return false;
433    }
434  }
435
436  inline bool sanitize (hb_sanitize_context_t *c) const
437  {
438    TRACE_SANITIZE (this);
439    if (!u.format.sanitize (c)) return_trace (false);
440    switch (u.format) {
441    case  0: return_trace (u.format0 .sanitize (c));
442    case  4: return_trace (u.format4 .sanitize (c));
443    case  6: return_trace (u.format6 .sanitize (c));
444    case 10: return_trace (u.format10.sanitize (c));
445    case 12: return_trace (u.format12.sanitize (c));
446    case 13: return_trace (u.format13.sanitize (c));
447    case 14: return_trace (u.format14.sanitize (c));
448    default:return_trace (true);
449    }
450  }
451
452  public:
453  union {
454  USHORT		format;		/* Format identifier */
455  CmapSubtableFormat0	format0;
456  CmapSubtableFormat4	format4;
457  CmapSubtableFormat6	format6;
458  CmapSubtableFormat10	format10;
459  CmapSubtableFormat12	format12;
460  CmapSubtableFormat13	format13;
461  CmapSubtableFormat14	format14;
462  } u;
463  public:
464  DEFINE_SIZE_UNION (2, format);
465};
466
467
468struct EncodingRecord
469{
470  inline int cmp (const EncodingRecord &other) const
471  {
472    int ret;
473    ret = platformID.cmp (other.platformID);
474    if (ret) return ret;
475    ret = encodingID.cmp (other.encodingID);
476    if (ret) return ret;
477    return 0;
478  }
479
480  inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
481  {
482    TRACE_SANITIZE (this);
483    return_trace (c->check_struct (this) &&
484		  subtable.sanitize (c, base));
485  }
486
487  USHORT	platformID;	/* Platform ID. */
488  USHORT	encodingID;	/* Platform-specific encoding ID. */
489  OffsetTo<CmapSubtable, ULONG>
490		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
491  public:
492  DEFINE_SIZE_STATIC (8);
493};
494
495struct cmap
496{
497  static const hb_tag_t tableTag	= HB_OT_TAG_cmap;
498
499  inline const CmapSubtable *find_subtable (unsigned int platform_id,
500					    unsigned int encoding_id) const
501  {
502    EncodingRecord key;
503    key.platformID.set (platform_id);
504    key.encodingID.set (encoding_id);
505
506    /* Note: We can use bsearch, but since it has no performance
507     * implications, we use lsearch and as such accept fonts with
508     * unsorted subtable list. */
509    int result = encodingRecord./*bsearch*/lsearch (key);
510    if (result == -1 || !encodingRecord[result].subtable)
511      return NULL;
512
513    return &(this+encodingRecord[result].subtable);
514  }
515
516  inline bool sanitize (hb_sanitize_context_t *c) const
517  {
518    TRACE_SANITIZE (this);
519    return_trace (c->check_struct (this) &&
520		  likely (version == 0) &&
521		  encodingRecord.sanitize (c, this));
522  }
523
524  USHORT		version;	/* Table version number (0). */
525  SortedArrayOf<EncodingRecord>
526			encodingRecord;	/* Encoding tables. */
527  public:
528  DEFINE_SIZE_ARRAY (4, encodingRecord);
529};
530
531
532} /* namespace OT */
533
534
535#endif /* HB_OT_CMAP_TABLE_HH */
536