hb-ot-cmap-table.hh revision c95587618c88d187be64f923033dae151cf820be
1/*
2 * Copyright © 2014  Google, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_OT_CMAP_TABLE_HH
28#define HB_OT_CMAP_TABLE_HH
29
30#include "hb-open-type-private.hh"
31
32
33namespace OT {
34
35
36/*
37 * cmap -- Character To Glyph Index Mapping Table
38 */
39
40#define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
41
42
43struct CmapSubtableFormat0
44{
45  friend struct CmapSubtable;
46
47  private:
48  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
49  {
50    hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
51    if (!gid)
52      return false;
53    *glyph = gid;
54    return true;
55  }
56
57  inline bool sanitize (hb_sanitize_context_t *c) {
58    TRACE_SANITIZE (this);
59    return TRACE_RETURN (c->check_struct (this));
60  }
61
62  protected:
63  USHORT	format;		/* Format number is set to 0. */
64  USHORT	length;		/* Byte length of this subtable. */
65  USHORT	language;	/* Ignore. */
66  BYTE		glyphIdArray[256];/* An array that maps character
67				 * code to glyph index values. */
68  public:
69  DEFINE_SIZE_STATIC (6 + 256);
70};
71
72struct CmapSubtableFormat4
73{
74  friend struct CmapSubtable;
75
76  private:
77  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
78  {
79    unsigned int segCount;
80    const USHORT *endCount;
81    const USHORT *startCount;
82    const USHORT *idDelta;
83    const USHORT *idRangeOffset;
84    const USHORT *glyphIdArray;
85    unsigned int glyphIdArrayLength;
86
87    segCount = this->segCountX2 / 2;
88    endCount = this->values;
89    startCount = endCount + segCount + 1;
90    idDelta = startCount + segCount;
91    idRangeOffset = idDelta + segCount;
92    glyphIdArray = idRangeOffset + segCount;
93    glyphIdArrayLength = (this->length - 16 - 8 * segCount) / 2;
94
95    /* Custom bsearch. */
96    int min = 0, max = (int) segCount - 1;
97    unsigned int i;
98    while (min <= max)
99    {
100      int mid = (min + max) / 2;
101      if (codepoint < startCount[mid])
102        max = mid - 1;
103      else if (codepoint > endCount[mid])
104        min = mid + 1;
105      else
106      {
107	i = mid;
108	goto found;
109      }
110    }
111    return false;
112
113  found:
114    hb_codepoint_t gid;
115    unsigned int rangeOffset = idRangeOffset[i];
116    if (rangeOffset == 0)
117      gid = codepoint + idDelta[i];
118    else
119    {
120      /* Somebody has been smoking... */
121      unsigned int index = rangeOffset / 2 + (codepoint - startCount[i]) + i - segCount;
122      if (unlikely (index >= glyphIdArrayLength))
123	return false;
124      gid = glyphIdArray[index];
125      if (unlikely (!gid))
126	return false;
127      gid += idDelta[i];
128    }
129
130    *glyph = gid & 0xFFFF;
131    return true;
132  }
133
134  inline bool sanitize (hb_sanitize_context_t *c) {
135    TRACE_SANITIZE (this);
136    return TRACE_RETURN (c->check_struct (this) &&
137			 c->check_range (this, length) &&
138			 16 + 4 * (unsigned int) segCountX2 < length);
139  }
140
141  protected:
142  USHORT	format;		/* Format number is set to 4. */
143  USHORT	length;		/* This is the length in bytes of the
144				 * subtable. */
145  USHORT	language;	/* Ignore. */
146  USHORT	segCountX2;	/* 2 x segCount. */
147  USHORT	searchRange;	/* 2 * (2**floor(log2(segCount))) */
148  USHORT	entrySelector;	/* log2(searchRange/2) */
149  USHORT	rangeShift;	/* 2 x segCount - searchRange */
150
151  USHORT	values[VAR];
152#if 0
153  USHORT	endCount[segCount];	/* End characterCode for each segment,
154					 * last=0xFFFF. */
155  USHORT	reservedPad;		/* Set to 0. */
156  USHORT	startCount[segCount];	/* Start character code for each segment. */
157  SHORT		idDelta[segCount];	/* Delta for all character codes in segment. */
158  USHORT	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
159  USHORT	glyphIdArray[VAR];	/* Glyph index array (arbitrary length) */
160#endif
161
162  public:
163  DEFINE_SIZE_ARRAY (14, values);
164};
165
166struct CmapSubtableLongGroup
167{
168  friend struct CmapSubtableFormat12;
169  friend struct CmapSubtableFormat13;
170
171  int cmp (hb_codepoint_t codepoint) const
172  {
173    if (codepoint < startCharCode) return -1;
174    if (codepoint > endCharCode)   return +1;
175    return 0;
176  }
177
178  inline bool sanitize (hb_sanitize_context_t *c) {
179    TRACE_SANITIZE (this);
180    return TRACE_RETURN (c->check_struct (this));
181  }
182
183  private:
184  ULONG		startCharCode;	/* First character code in this group. */
185  ULONG		endCharCode;	/* Last character code in this group. */
186  ULONG		glyphID;	/* Glyph index; interpretation depends on
187				 * subtable format. */
188  public:
189  DEFINE_SIZE_STATIC (12);
190};
191
192template <typename UINT>
193struct CmapSubtableTrimmed
194{
195  friend struct CmapSubtable;
196
197  private:
198  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
199  {
200    /* Rely on our implicit array bound-checking. */
201    hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
202    if (!gid)
203      return false;
204    *glyph = gid;
205    return true;
206  }
207
208  inline bool sanitize (hb_sanitize_context_t *c) {
209    TRACE_SANITIZE (this);
210    return TRACE_RETURN (c->check_struct (this) && glyphIdArray.sanitize (c));
211  }
212
213  protected:
214  UINT		formatReserved;	/* Subtable format and (maybe) padding. */
215  UINT		length;		/* Byte length of this subtable. */
216  UINT		language;	/* Ignore. */
217  UINT		startCharCode;	/* First character code covered. */
218  GenericArrayOf<UINT, GlyphID>
219		glyphIdArray;	/* Array of glyph index values for character
220				 * codes in the range. */
221  public:
222  DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
223};
224
225struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
226struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
227
228template <typename T>
229struct CmapSubtableLongSegmented
230{
231  friend struct CmapSubtable;
232
233  private:
234  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
235  {
236    int i = groups.search (codepoint);
237    if (i == -1)
238      return false;
239    *glyph = T::group_get_glyph (groups[i], codepoint);
240    return true;
241  }
242
243  inline bool sanitize (hb_sanitize_context_t *c) {
244    TRACE_SANITIZE (this);
245    return TRACE_RETURN (c->check_struct (this) && groups.sanitize (c));
246  }
247
248  protected:
249  USHORT	format;		/* Subtable format; set to 12. */
250  USHORT	reserved;	/* Reserved; set to 0. */
251  ULONG		length;		/* Byte length of this subtable. */
252  ULONG		language;	/* Ignore. */
253  LongArrayOf<CmapSubtableLongGroup>
254		groups;		/* Groupings. */
255  public:
256  DEFINE_SIZE_ARRAY (16, groups);
257};
258
259struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
260{
261  static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
262						hb_codepoint_t u)
263  { return group.glyphID + (u - group.startCharCode); }
264};
265
266struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
267{
268  static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
269						hb_codepoint_t u HB_UNUSED)
270  { return group.glyphID; }
271};
272
273struct CmapSubtable
274{
275  /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
276
277  inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
278  {
279    switch (u.format) {
280    case  0: return u.format0 .get_glyph(codepoint, glyph);
281    case  4: return u.format4 .get_glyph(codepoint, glyph);
282    case  6: return u.format6 .get_glyph(codepoint, glyph);
283    case 10: return u.format10.get_glyph(codepoint, glyph);
284    case 12: return u.format12.get_glyph(codepoint, glyph);
285    case 13: return u.format13.get_glyph(codepoint, glyph);
286    default:return false;
287    }
288  }
289
290  inline bool sanitize (hb_sanitize_context_t *c) {
291    TRACE_SANITIZE (this);
292    if (!u.format.sanitize (c)) return TRACE_RETURN (false);
293    switch (u.format) {
294    case  0: return TRACE_RETURN (u.format0 .sanitize (c));
295    case  4: return TRACE_RETURN (u.format4 .sanitize (c));
296    case  6: return TRACE_RETURN (u.format6 .sanitize (c));
297    case 10: return TRACE_RETURN (u.format10.sanitize (c));
298    case 12: return TRACE_RETURN (u.format12.sanitize (c));
299    case 13: return TRACE_RETURN (u.format13.sanitize (c));
300    default:return TRACE_RETURN (true);
301    }
302  }
303
304  protected:
305  union {
306  USHORT		format;		/* Format identifier */
307  CmapSubtableFormat0	format0;
308  CmapSubtableFormat4	format4;
309  CmapSubtableFormat6	format6;
310  CmapSubtableFormat10	format10;
311  CmapSubtableFormat12	format12;
312  CmapSubtableFormat13	format13;
313  } u;
314  public:
315  DEFINE_SIZE_UNION (2, format);
316};
317
318
319struct EncodingRecord
320{
321  int cmp (const EncodingRecord &other) const
322  {
323    int ret;
324    ret = other.platformID.cmp (platformID);
325    if (ret) return ret;
326    ret = other.encodingID.cmp (encodingID);
327    if (ret) return ret;
328    return 0;
329  }
330
331  inline bool sanitize (hb_sanitize_context_t *c, void *base) {
332    TRACE_SANITIZE (this);
333    return TRACE_RETURN (c->check_struct (this) &&
334			 subtable.sanitize (c, base));
335  }
336
337  USHORT	platformID;	/* Platform ID. */
338  USHORT	encodingID;	/* Platform-specific encoding ID. */
339  LongOffsetTo<CmapSubtable>
340		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
341  public:
342  DEFINE_SIZE_STATIC (8);
343};
344
345struct cmap
346{
347  static const hb_tag_t tableTag	= HB_OT_TAG_cmap;
348
349  inline const CmapSubtable *find_subtable (unsigned int platform_id,
350					    unsigned int encoding_id) const
351  {
352    EncodingRecord key;
353    key.platformID.set (platform_id);
354    key.encodingID.set (encoding_id);
355
356    int result = encodingRecord.search (key);
357    if (result == -1)
358      return NULL;
359
360    return &(this+encodingRecord[result].subtable);
361  }
362
363  inline bool sanitize (hb_sanitize_context_t *c) {
364    TRACE_SANITIZE (this);
365    return TRACE_RETURN (c->check_struct (this) &&
366			 likely (version == 0) &&
367			 encodingRecord.sanitize (c, this));
368  }
369
370  USHORT			version;	/* Table version number (0). */
371  ArrayOf<EncodingRecord>	encodingRecord;	/* Encoding tables. */
372  public:
373  DEFINE_SIZE_ARRAY (4, encodingRecord);
374};
375
376
377} /* namespace OT */
378
379
380#endif /* HB_OT_CMAP_TABLE_HH */
381