1/*
2 * Copyright © 2014  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_OT_CMAP_TABLE_HH
28#define HB_OT_CMAP_TABLE_HH
29
30#include "hb-open-type-private.hh"
31
32
33namespace OT {
34
35
36/*
37 * cmap -- Character To Glyph Index Mapping Table
38 */
39
40#define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
41
42
43struct CmapSubtableFormat0
44{
45  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
46  {
47    hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
48    if (!gid)
49      return false;
50    *glyph = gid;
51    return true;
52  }
53
54  inline bool sanitize (hb_sanitize_context_t *c) {
55    TRACE_SANITIZE (this);
56    return TRACE_RETURN (c->check_struct (this));
57  }
58
59  protected:
60  USHORT	format;		/* Format number is set to 0. */
61  USHORT	lengthZ;	/* Byte length of this subtable. */
62  USHORT	languageZ;	/* Ignore. */
63  BYTE		glyphIdArray[256];/* An array that maps character
64				 * code to glyph index values. */
65  public:
66  DEFINE_SIZE_STATIC (6 + 256);
67};
68
69struct CmapSubtableFormat4
70{
71  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
72  {
73    unsigned int segCount;
74    const USHORT *endCount;
75    const USHORT *startCount;
76    const USHORT *idDelta;
77    const USHORT *idRangeOffset;
78    const USHORT *glyphIdArray;
79    unsigned int glyphIdArrayLength;
80
81    segCount = this->segCountX2 / 2;
82    endCount = this->values;
83    startCount = endCount + segCount + 1;
84    idDelta = startCount + segCount;
85    idRangeOffset = idDelta + segCount;
86    glyphIdArray = idRangeOffset + segCount;
87    glyphIdArrayLength = (this->length - 16 - 8 * segCount) / 2;
88
89    /* Custom two-array bsearch. */
90    int min = 0, max = (int) segCount - 1;
91    unsigned int i;
92    while (min <= max)
93    {
94      int mid = (min + max) / 2;
95      if (codepoint < startCount[mid])
96        max = mid - 1;
97      else if (codepoint > endCount[mid])
98        min = mid + 1;
99      else
100      {
101	i = mid;
102	goto found;
103      }
104    }
105    return false;
106
107  found:
108    hb_codepoint_t gid;
109    unsigned int rangeOffset = idRangeOffset[i];
110    if (rangeOffset == 0)
111      gid = codepoint + idDelta[i];
112    else
113    {
114      /* Somebody has been smoking... */
115      unsigned int index = rangeOffset / 2 + (codepoint - startCount[i]) + i - segCount;
116      if (unlikely (index >= glyphIdArrayLength))
117	return false;
118      gid = glyphIdArray[index];
119      if (unlikely (!gid))
120	return false;
121      gid += idDelta[i];
122    }
123
124    *glyph = gid & 0xFFFFu;
125    return true;
126  }
127
128  inline bool sanitize (hb_sanitize_context_t *c)
129  {
130    TRACE_SANITIZE (this);
131    if (unlikely (!c->check_struct (this)))
132      return TRACE_RETURN (false);
133
134    if (unlikely (!c->check_range (this, length)))
135    {
136      /* Some broken fonts have too long of a "length" value.
137       * If that is the case, just change the value to truncate
138       * the subtable at the end of the blob. */
139      uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
140					    (uintptr_t) (c->end -
141							 (char *) this));
142      if (!c->try_set (&length, new_length))
143	return TRACE_RETURN (false);
144    }
145
146    return TRACE_RETURN (16 + 4 * (unsigned int) segCountX2 <= length);
147  }
148
149  protected:
150  USHORT	format;		/* Format number is set to 4. */
151  USHORT	length;		/* This is the length in bytes of the
152				 * subtable. */
153  USHORT	languageZ;	/* Ignore. */
154  USHORT	segCountX2;	/* 2 x segCount. */
155  USHORT	searchRangeZ;	/* 2 * (2**floor(log2(segCount))) */
156  USHORT	entrySelectorZ;	/* log2(searchRange/2) */
157  USHORT	rangeShiftZ;	/* 2 x segCount - searchRange */
158
159  USHORT	values[VAR];
160#if 0
161  USHORT	endCount[segCount];	/* End characterCode for each segment,
162					 * last=0xFFFFu. */
163  USHORT	reservedPad;		/* Set to 0. */
164  USHORT	startCount[segCount];	/* Start character code for each segment. */
165  SHORT		idDelta[segCount];	/* Delta for all character codes in segment. */
166  USHORT	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
167  USHORT	glyphIdArray[VAR];	/* Glyph index array (arbitrary length) */
168#endif
169
170  public:
171  DEFINE_SIZE_ARRAY (14, values);
172};
173
174struct CmapSubtableLongGroup
175{
176  friend struct CmapSubtableFormat12;
177  friend struct CmapSubtableFormat13;
178
179  int cmp (hb_codepoint_t codepoint) const
180  {
181    if (codepoint < startCharCode) return -1;
182    if (codepoint > endCharCode)   return +1;
183    return 0;
184  }
185
186  inline bool sanitize (hb_sanitize_context_t *c) {
187    TRACE_SANITIZE (this);
188    return TRACE_RETURN (c->check_struct (this));
189  }
190
191  private:
192  ULONG		startCharCode;	/* First character code in this group. */
193  ULONG		endCharCode;	/* Last character code in this group. */
194  ULONG		glyphID;	/* Glyph index; interpretation depends on
195				 * subtable format. */
196  public:
197  DEFINE_SIZE_STATIC (12);
198};
199
200template <typename UINT>
201struct CmapSubtableTrimmed
202{
203  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
204  {
205    /* Rely on our implicit array bound-checking. */
206    hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
207    if (!gid)
208      return false;
209    *glyph = gid;
210    return true;
211  }
212
213  inline bool sanitize (hb_sanitize_context_t *c) {
214    TRACE_SANITIZE (this);
215    return TRACE_RETURN (c->check_struct (this) && glyphIdArray.sanitize (c));
216  }
217
218  protected:
219  UINT		formatReserved;	/* Subtable format and (maybe) padding. */
220  UINT		lengthZ;	/* Byte length of this subtable. */
221  UINT		languageZ;	/* Ignore. */
222  UINT		startCharCode;	/* First character code covered. */
223  ArrayOf<GlyphID, UINT>
224		glyphIdArray;	/* Array of glyph index values for character
225				 * codes in the range. */
226  public:
227  DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
228};
229
230struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
231struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
232
233template <typename T>
234struct CmapSubtableLongSegmented
235{
236  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
237  {
238    int i = groups.bsearch (codepoint);
239    if (i == -1)
240      return false;
241    *glyph = T::group_get_glyph (groups[i], codepoint);
242    return true;
243  }
244
245  inline bool sanitize (hb_sanitize_context_t *c) {
246    TRACE_SANITIZE (this);
247    return TRACE_RETURN (c->check_struct (this) && groups.sanitize (c));
248  }
249
250  protected:
251  USHORT	format;		/* Subtable format; set to 12. */
252  USHORT	reservedZ;	/* Reserved; set to 0. */
253  ULONG		lengthZ;	/* Byte length of this subtable. */
254  ULONG		languageZ;	/* Ignore. */
255  SortedArrayOf<CmapSubtableLongGroup, ULONG>
256		groups;		/* Groupings. */
257  public:
258  DEFINE_SIZE_ARRAY (16, groups);
259};
260
261struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
262{
263  static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
264						hb_codepoint_t u)
265  { return group.glyphID + (u - group.startCharCode); }
266};
267
268struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
269{
270  static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
271						hb_codepoint_t u HB_UNUSED)
272  { return group.glyphID; }
273};
274
275typedef enum
276{
277  GLYPH_VARIANT_NOT_FOUND = 0,
278  GLYPH_VARIANT_FOUND = 1,
279  GLYPH_VARIANT_USE_DEFAULT = 2
280} glyph_variant_t;
281
282struct UnicodeValueRange
283{
284  inline int cmp (const hb_codepoint_t &codepoint) const
285  {
286    if (codepoint < startUnicodeValue) return -1;
287    if (codepoint > startUnicodeValue + additionalCount) return +1;
288    return 0;
289  }
290
291  inline bool sanitize (hb_sanitize_context_t *c) {
292    TRACE_SANITIZE (this);
293    return TRACE_RETURN (c->check_struct (this));
294  }
295
296  UINT24	startUnicodeValue;	/* First value in this range. */
297  BYTE		additionalCount;	/* Number of additional values in this
298					 * range. */
299  public:
300  DEFINE_SIZE_STATIC (4);
301};
302
303typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS;
304
305struct UVSMapping
306{
307  inline int cmp (const hb_codepoint_t &codepoint) const
308  {
309    return unicodeValue.cmp (codepoint);
310  }
311
312  inline bool sanitize (hb_sanitize_context_t *c) {
313    TRACE_SANITIZE (this);
314    return TRACE_RETURN (c->check_struct (this));
315  }
316
317  UINT24	unicodeValue;	/* Base Unicode value of the UVS */
318  GlyphID	glyphID;	/* Glyph ID of the UVS */
319  public:
320  DEFINE_SIZE_STATIC (5);
321};
322
323typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS;
324
325struct VariationSelectorRecord
326{
327  inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
328				    hb_codepoint_t *glyph,
329				    const void *base) const
330  {
331    int i;
332    const DefaultUVS &defaults = base+defaultUVS;
333    i = defaults.bsearch (codepoint);
334    if (i != -1)
335      return GLYPH_VARIANT_USE_DEFAULT;
336    const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
337    i = nonDefaults.bsearch (codepoint);
338    if (i != -1)
339    {
340      *glyph = nonDefaults[i].glyphID;
341       return GLYPH_VARIANT_FOUND;
342    }
343    return GLYPH_VARIANT_NOT_FOUND;
344  }
345
346  inline int cmp (const hb_codepoint_t &variation_selector) const
347  {
348    return varSelector.cmp (variation_selector);
349  }
350
351  inline bool sanitize (hb_sanitize_context_t *c, void *base) {
352    TRACE_SANITIZE (this);
353    return TRACE_RETURN (c->check_struct (this) &&
354			 defaultUVS.sanitize (c, base) &&
355			 nonDefaultUVS.sanitize (c, base));
356  }
357
358  UINT24	varSelector;	/* Variation selector. */
359  OffsetTo<DefaultUVS, ULONG>
360		defaultUVS;	/* Offset to Default UVS Table. May be 0. */
361  OffsetTo<NonDefaultUVS, ULONG>
362		nonDefaultUVS;	/* Offset to Non-Default UVS Table. May be 0. */
363  public:
364  DEFINE_SIZE_STATIC (11);
365};
366
367struct CmapSubtableFormat14
368{
369  inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
370					    hb_codepoint_t variation_selector,
371					    hb_codepoint_t *glyph) const
372  {
373    return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
374  }
375
376  inline bool sanitize (hb_sanitize_context_t *c) {
377    TRACE_SANITIZE (this);
378    return TRACE_RETURN (c->check_struct (this) &&
379			 record.sanitize (c, this));
380  }
381
382  protected:
383  USHORT	format;		/* Format number is set to 0. */
384  ULONG		lengthZ;	/* Byte length of this subtable. */
385  SortedArrayOf<VariationSelectorRecord, ULONG>
386		record;		/* Variation selector records; sorted
387				 * in increasing order of `varSelector'. */
388  public:
389  DEFINE_SIZE_ARRAY (10, record);
390};
391
392struct CmapSubtable
393{
394  /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
395
396  inline bool get_glyph (hb_codepoint_t codepoint,
397			 hb_codepoint_t *glyph) const
398  {
399    switch (u.format) {
400    case  0: return u.format0 .get_glyph(codepoint, glyph);
401    case  4: return u.format4 .get_glyph(codepoint, glyph);
402    case  6: return u.format6 .get_glyph(codepoint, glyph);
403    case 10: return u.format10.get_glyph(codepoint, glyph);
404    case 12: return u.format12.get_glyph(codepoint, glyph);
405    case 13: return u.format13.get_glyph(codepoint, glyph);
406    case 14:
407    default: return false;
408    }
409  }
410
411  inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
412					    hb_codepoint_t variation_selector,
413					    hb_codepoint_t *glyph) const
414  {
415    switch (u.format) {
416    case 14: return u.format14.get_glyph_variant(codepoint, variation_selector, glyph);
417    default: return GLYPH_VARIANT_NOT_FOUND;
418    }
419  }
420
421  inline bool sanitize (hb_sanitize_context_t *c) {
422    TRACE_SANITIZE (this);
423    if (!u.format.sanitize (c)) return TRACE_RETURN (false);
424    switch (u.format) {
425    case  0: return TRACE_RETURN (u.format0 .sanitize (c));
426    case  4: return TRACE_RETURN (u.format4 .sanitize (c));
427    case  6: return TRACE_RETURN (u.format6 .sanitize (c));
428    case 10: return TRACE_RETURN (u.format10.sanitize (c));
429    case 12: return TRACE_RETURN (u.format12.sanitize (c));
430    case 13: return TRACE_RETURN (u.format13.sanitize (c));
431    case 14: return TRACE_RETURN (u.format14.sanitize (c));
432    default:return TRACE_RETURN (true);
433    }
434  }
435
436  protected:
437  union {
438  USHORT		format;		/* Format identifier */
439  CmapSubtableFormat0	format0;
440  CmapSubtableFormat4	format4;
441  CmapSubtableFormat6	format6;
442  CmapSubtableFormat10	format10;
443  CmapSubtableFormat12	format12;
444  CmapSubtableFormat13	format13;
445  CmapSubtableFormat14	format14;
446  } u;
447  public:
448  DEFINE_SIZE_UNION (2, format);
449};
450
451
452struct EncodingRecord
453{
454  inline int cmp (const EncodingRecord &other) const
455  {
456    int ret;
457    ret = platformID.cmp (other.platformID);
458    if (ret) return ret;
459    ret = encodingID.cmp (other.encodingID);
460    if (ret) return ret;
461    return 0;
462  }
463
464  inline bool sanitize (hb_sanitize_context_t *c, void *base) {
465    TRACE_SANITIZE (this);
466    return TRACE_RETURN (c->check_struct (this) &&
467			 subtable.sanitize (c, base));
468  }
469
470  USHORT	platformID;	/* Platform ID. */
471  USHORT	encodingID;	/* Platform-specific encoding ID. */
472  OffsetTo<CmapSubtable, ULONG>
473		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
474  public:
475  DEFINE_SIZE_STATIC (8);
476};
477
478struct cmap
479{
480  static const hb_tag_t tableTag	= HB_OT_TAG_cmap;
481
482  inline const CmapSubtable *find_subtable (unsigned int platform_id,
483					    unsigned int encoding_id) const
484  {
485    EncodingRecord key;
486    key.platformID.set (platform_id);
487    key.encodingID.set (encoding_id);
488
489    /* Note: We can use bsearch, but since it has no performance
490     * implications, we use lsearch and as such accept fonts with
491     * unsorted subtable list. */
492    int result = encodingRecord./*bsearch*/lsearch (key);
493    if (result == -1 || !encodingRecord[result].subtable)
494      return NULL;
495
496    return &(this+encodingRecord[result].subtable);
497  }
498
499  inline bool sanitize (hb_sanitize_context_t *c) {
500    TRACE_SANITIZE (this);
501    return TRACE_RETURN (c->check_struct (this) &&
502			 likely (version == 0) &&
503			 encodingRecord.sanitize (c, this));
504  }
505
506  USHORT		version;	/* Table version number (0). */
507  SortedArrayOf<EncodingRecord>
508			encodingRecord;	/* Encoding tables. */
509  public:
510  DEFINE_SIZE_ARRAY (4, encodingRecord);
511};
512
513
514} /* namespace OT */
515
516
517#endif /* HB_OT_CMAP_TABLE_HH */
518