hb-ot-cmap-table.hh revision b47159011ca518c3b94d782ed16a91ffe9dd2ab2
1/*
2 * Copyright © 2014  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_OT_CMAP_TABLE_HH
28#define HB_OT_CMAP_TABLE_HH
29
30#include "hb-open-type-private.hh"
31
32
33namespace OT {
34
35
36/*
37 * cmap -- Character To Glyph Index Mapping Table
38 */
39
40#define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
41
42
43struct CmapSubtableFormat0
44{
45  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
46  {
47    hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
48    if (!gid)
49      return false;
50    *glyph = gid;
51    return true;
52  }
53
54  inline bool sanitize (hb_sanitize_context_t *c) const
55  {
56    TRACE_SANITIZE (this);
57    return_trace (c->check_struct (this));
58  }
59
60  protected:
61  USHORT	format;		/* Format number is set to 0. */
62  USHORT	lengthZ;	/* Byte length of this subtable. */
63  USHORT	languageZ;	/* Ignore. */
64  BYTE		glyphIdArray[256];/* An array that maps character
65				 * code to glyph index values. */
66  public:
67  DEFINE_SIZE_STATIC (6 + 256);
68};
69
70struct CmapSubtableFormat4
71{
72  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
73  {
74    unsigned int segCount;
75    const USHORT *endCount;
76    const USHORT *startCount;
77    const USHORT *idDelta;
78    const USHORT *idRangeOffset;
79    const USHORT *glyphIdArray;
80    unsigned int glyphIdArrayLength;
81
82    segCount = this->segCountX2 / 2;
83    endCount = this->values;
84    startCount = endCount + segCount + 1;
85    idDelta = startCount + segCount;
86    idRangeOffset = idDelta + segCount;
87    glyphIdArray = idRangeOffset + segCount;
88    glyphIdArrayLength = (this->length - 16 - 8 * segCount) / 2;
89
90    /* Custom two-array bsearch. */
91    int min = 0, max = (int) segCount - 1;
92    unsigned int i;
93    while (min <= max)
94    {
95      int mid = (min + max) / 2;
96      if (codepoint < startCount[mid])
97        max = mid - 1;
98      else if (codepoint > endCount[mid])
99        min = mid + 1;
100      else
101      {
102	i = mid;
103	goto found;
104      }
105    }
106    return false;
107
108  found:
109    hb_codepoint_t gid;
110    unsigned int rangeOffset = idRangeOffset[i];
111    if (rangeOffset == 0)
112      gid = codepoint + idDelta[i];
113    else
114    {
115      /* Somebody has been smoking... */
116      unsigned int index = rangeOffset / 2 + (codepoint - startCount[i]) + i - segCount;
117      if (unlikely (index >= glyphIdArrayLength))
118	return false;
119      gid = glyphIdArray[index];
120      if (unlikely (!gid))
121	return false;
122      gid += idDelta[i];
123    }
124
125    *glyph = gid & 0xFFFFu;
126    return true;
127  }
128
129  inline bool sanitize (hb_sanitize_context_t *c) const
130  {
131    TRACE_SANITIZE (this);
132    if (unlikely (!c->check_struct (this)))
133      return_trace (false);
134
135    if (unlikely (!c->check_range (this, length)))
136    {
137      /* Some broken fonts have too long of a "length" value.
138       * If that is the case, just change the value to truncate
139       * the subtable at the end of the blob. */
140      uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
141					    (uintptr_t) (c->end -
142							 (char *) this));
143      if (!c->try_set (&length, new_length))
144	return_trace (false);
145    }
146
147    return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
148  }
149
150  protected:
151  USHORT	format;		/* Format number is set to 4. */
152  USHORT	length;		/* This is the length in bytes of the
153				 * subtable. */
154  USHORT	languageZ;	/* Ignore. */
155  USHORT	segCountX2;	/* 2 x segCount. */
156  USHORT	searchRangeZ;	/* 2 * (2**floor(log2(segCount))) */
157  USHORT	entrySelectorZ;	/* log2(searchRange/2) */
158  USHORT	rangeShiftZ;	/* 2 x segCount - searchRange */
159
160  USHORT	values[VAR];
161#if 0
162  USHORT	endCount[segCount];	/* End characterCode for each segment,
163					 * last=0xFFFFu. */
164  USHORT	reservedPad;		/* Set to 0. */
165  USHORT	startCount[segCount];	/* Start character code for each segment. */
166  SHORT		idDelta[segCount];	/* Delta for all character codes in segment. */
167  USHORT	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
168  USHORT	glyphIdArray[VAR];	/* Glyph index array (arbitrary length) */
169#endif
170
171  public:
172  DEFINE_SIZE_ARRAY (14, values);
173};
174
175struct CmapSubtableLongGroup
176{
177  friend struct CmapSubtableFormat12;
178  friend struct CmapSubtableFormat13;
179
180  int cmp (hb_codepoint_t codepoint) const
181  {
182    if (codepoint < startCharCode) return -1;
183    if (codepoint > endCharCode)   return +1;
184    return 0;
185  }
186
187  inline bool sanitize (hb_sanitize_context_t *c) const
188  {
189    TRACE_SANITIZE (this);
190    return_trace (c->check_struct (this));
191  }
192
193  private:
194  ULONG		startCharCode;	/* First character code in this group. */
195  ULONG		endCharCode;	/* Last character code in this group. */
196  ULONG		glyphID;	/* Glyph index; interpretation depends on
197				 * subtable format. */
198  public:
199  DEFINE_SIZE_STATIC (12);
200};
201
202template <typename UINT>
203struct CmapSubtableTrimmed
204{
205  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
206  {
207    /* Rely on our implicit array bound-checking. */
208    hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
209    if (!gid)
210      return false;
211    *glyph = gid;
212    return true;
213  }
214
215  inline bool sanitize (hb_sanitize_context_t *c) const
216  {
217    TRACE_SANITIZE (this);
218    return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
219  }
220
221  protected:
222  UINT		formatReserved;	/* Subtable format and (maybe) padding. */
223  UINT		lengthZ;	/* Byte length of this subtable. */
224  UINT		languageZ;	/* Ignore. */
225  UINT		startCharCode;	/* First character code covered. */
226  ArrayOf<GlyphID, UINT>
227		glyphIdArray;	/* Array of glyph index values for character
228				 * codes in the range. */
229  public:
230  DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
231};
232
233struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
234struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
235
236template <typename T>
237struct CmapSubtableLongSegmented
238{
239  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
240  {
241    int i = groups.bsearch (codepoint);
242    if (i == -1)
243      return false;
244    *glyph = T::group_get_glyph (groups[i], codepoint);
245    return true;
246  }
247
248  inline bool sanitize (hb_sanitize_context_t *c) const
249  {
250    TRACE_SANITIZE (this);
251    return_trace (c->check_struct (this) && groups.sanitize (c));
252  }
253
254  protected:
255  USHORT	format;		/* Subtable format; set to 12. */
256  USHORT	reservedZ;	/* Reserved; set to 0. */
257  ULONG		lengthZ;	/* Byte length of this subtable. */
258  ULONG		languageZ;	/* Ignore. */
259  SortedArrayOf<CmapSubtableLongGroup, ULONG>
260		groups;		/* Groupings. */
261  public:
262  DEFINE_SIZE_ARRAY (16, groups);
263};
264
265struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
266{
267  static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
268						hb_codepoint_t u)
269  { return group.glyphID + (u - group.startCharCode); }
270};
271
272struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
273{
274  static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
275						hb_codepoint_t u HB_UNUSED)
276  { return group.glyphID; }
277};
278
279typedef enum
280{
281  GLYPH_VARIANT_NOT_FOUND = 0,
282  GLYPH_VARIANT_FOUND = 1,
283  GLYPH_VARIANT_USE_DEFAULT = 2
284} glyph_variant_t;
285
286struct UnicodeValueRange
287{
288  inline int cmp (const hb_codepoint_t &codepoint) const
289  {
290    if (codepoint < startUnicodeValue) return -1;
291    if (codepoint > startUnicodeValue + additionalCount) return +1;
292    return 0;
293  }
294
295  inline bool sanitize (hb_sanitize_context_t *c) const
296  {
297    TRACE_SANITIZE (this);
298    return_trace (c->check_struct (this));
299  }
300
301  UINT24	startUnicodeValue;	/* First value in this range. */
302  BYTE		additionalCount;	/* Number of additional values in this
303					 * range. */
304  public:
305  DEFINE_SIZE_STATIC (4);
306};
307
308typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS;
309
310struct UVSMapping
311{
312  inline int cmp (const hb_codepoint_t &codepoint) const
313  {
314    return unicodeValue.cmp (codepoint);
315  }
316
317  inline bool sanitize (hb_sanitize_context_t *c) const
318  {
319    TRACE_SANITIZE (this);
320    return_trace (c->check_struct (this));
321  }
322
323  UINT24	unicodeValue;	/* Base Unicode value of the UVS */
324  GlyphID	glyphID;	/* Glyph ID of the UVS */
325  public:
326  DEFINE_SIZE_STATIC (5);
327};
328
329typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS;
330
331struct VariationSelectorRecord
332{
333  inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
334				    hb_codepoint_t *glyph,
335				    const void *base) const
336  {
337    int i;
338    const DefaultUVS &defaults = base+defaultUVS;
339    i = defaults.bsearch (codepoint);
340    if (i != -1)
341      return GLYPH_VARIANT_USE_DEFAULT;
342    const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
343    i = nonDefaults.bsearch (codepoint);
344    if (i != -1)
345    {
346      *glyph = nonDefaults[i].glyphID;
347       return GLYPH_VARIANT_FOUND;
348    }
349    return GLYPH_VARIANT_NOT_FOUND;
350  }
351
352  inline int cmp (const hb_codepoint_t &variation_selector) const
353  {
354    return varSelector.cmp (variation_selector);
355  }
356
357  inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
358  {
359    TRACE_SANITIZE (this);
360    return_trace (c->check_struct (this) &&
361		  defaultUVS.sanitize (c, base) &&
362		  nonDefaultUVS.sanitize (c, base));
363  }
364
365  UINT24	varSelector;	/* Variation selector. */
366  OffsetTo<DefaultUVS, ULONG>
367		defaultUVS;	/* Offset to Default UVS Table. May be 0. */
368  OffsetTo<NonDefaultUVS, ULONG>
369		nonDefaultUVS;	/* Offset to Non-Default UVS Table. May be 0. */
370  public:
371  DEFINE_SIZE_STATIC (11);
372};
373
374struct CmapSubtableFormat14
375{
376  inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
377					    hb_codepoint_t variation_selector,
378					    hb_codepoint_t *glyph) const
379  {
380    return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
381  }
382
383  inline bool sanitize (hb_sanitize_context_t *c) const
384  {
385    TRACE_SANITIZE (this);
386    return_trace (c->check_struct (this) &&
387		  record.sanitize (c, this));
388  }
389
390  protected:
391  USHORT	format;		/* Format number is set to 0. */
392  ULONG		lengthZ;	/* Byte length of this subtable. */
393  SortedArrayOf<VariationSelectorRecord, ULONG>
394		record;		/* Variation selector records; sorted
395				 * in increasing order of `varSelector'. */
396  public:
397  DEFINE_SIZE_ARRAY (10, record);
398};
399
400struct CmapSubtable
401{
402  /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
403
404  inline bool get_glyph (hb_codepoint_t codepoint,
405			 hb_codepoint_t *glyph) const
406  {
407    switch (u.format) {
408    case  0: return u.format0 .get_glyph(codepoint, glyph);
409    case  4: return u.format4 .get_glyph(codepoint, glyph);
410    case  6: return u.format6 .get_glyph(codepoint, glyph);
411    case 10: return u.format10.get_glyph(codepoint, glyph);
412    case 12: return u.format12.get_glyph(codepoint, glyph);
413    case 13: return u.format13.get_glyph(codepoint, glyph);
414    case 14:
415    default: return false;
416    }
417  }
418
419  inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
420					    hb_codepoint_t variation_selector,
421					    hb_codepoint_t *glyph) const
422  {
423    switch (u.format) {
424    case 14: return u.format14.get_glyph_variant(codepoint, variation_selector, glyph);
425    default: return GLYPH_VARIANT_NOT_FOUND;
426    }
427  }
428
429  inline bool sanitize (hb_sanitize_context_t *c) const
430  {
431    TRACE_SANITIZE (this);
432    if (!u.format.sanitize (c)) return_trace (false);
433    switch (u.format) {
434    case  0: return_trace (u.format0 .sanitize (c));
435    case  4: return_trace (u.format4 .sanitize (c));
436    case  6: return_trace (u.format6 .sanitize (c));
437    case 10: return_trace (u.format10.sanitize (c));
438    case 12: return_trace (u.format12.sanitize (c));
439    case 13: return_trace (u.format13.sanitize (c));
440    case 14: return_trace (u.format14.sanitize (c));
441    default:return_trace (true);
442    }
443  }
444
445  protected:
446  union {
447  USHORT		format;		/* Format identifier */
448  CmapSubtableFormat0	format0;
449  CmapSubtableFormat4	format4;
450  CmapSubtableFormat6	format6;
451  CmapSubtableFormat10	format10;
452  CmapSubtableFormat12	format12;
453  CmapSubtableFormat13	format13;
454  CmapSubtableFormat14	format14;
455  } u;
456  public:
457  DEFINE_SIZE_UNION (2, format);
458};
459
460
461struct EncodingRecord
462{
463  inline int cmp (const EncodingRecord &other) const
464  {
465    int ret;
466    ret = platformID.cmp (other.platformID);
467    if (ret) return ret;
468    ret = encodingID.cmp (other.encodingID);
469    if (ret) return ret;
470    return 0;
471  }
472
473  inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
474  {
475    TRACE_SANITIZE (this);
476    return_trace (c->check_struct (this) &&
477		  subtable.sanitize (c, base));
478  }
479
480  USHORT	platformID;	/* Platform ID. */
481  USHORT	encodingID;	/* Platform-specific encoding ID. */
482  OffsetTo<CmapSubtable, ULONG>
483		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
484  public:
485  DEFINE_SIZE_STATIC (8);
486};
487
488struct cmap
489{
490  static const hb_tag_t tableTag	= HB_OT_TAG_cmap;
491
492  inline const CmapSubtable *find_subtable (unsigned int platform_id,
493					    unsigned int encoding_id) const
494  {
495    EncodingRecord key;
496    key.platformID.set (platform_id);
497    key.encodingID.set (encoding_id);
498
499    /* Note: We can use bsearch, but since it has no performance
500     * implications, we use lsearch and as such accept fonts with
501     * unsorted subtable list. */
502    int result = encodingRecord./*bsearch*/lsearch (key);
503    if (result == -1 || !encodingRecord[result].subtable)
504      return NULL;
505
506    return &(this+encodingRecord[result].subtable);
507  }
508
509  inline bool sanitize (hb_sanitize_context_t *c) const
510  {
511    TRACE_SANITIZE (this);
512    return_trace (c->check_struct (this) &&
513		  likely (version == 0) &&
514		  encodingRecord.sanitize (c, this));
515  }
516
517  USHORT		version;	/* Table version number (0). */
518  SortedArrayOf<EncodingRecord>
519			encodingRecord;	/* Encoding tables. */
520  public:
521  DEFINE_SIZE_ARRAY (4, encodingRecord);
522};
523
524
525} /* namespace OT */
526
527
528#endif /* HB_OT_CMAP_TABLE_HH */
529