hb-unicode.cc revision 3f310dc0cae9015c45ba642b9b83d5695c807aad
1/*
2 * Copyright © 2009  Red Hat, Inc.
3 * Copyright © 2011  Codethink Limited
4 * Copyright © 2010,2011,2012  Google, Inc.
5 *
6 *  This is part of HarfBuzz, a text shaping library.
7 *
8 * Permission is hereby granted, without written agreement and without
9 * license or royalty fees, to use, copy, modify, and distribute this
10 * software and its documentation for any purpose, provided that the
11 * above copyright notice and the following two paragraphs appear in
12 * all copies of this software.
13 *
14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * DAMAGE.
19 *
20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 *
26 * Red Hat Author(s): Behdad Esfahbod
27 * Codethink Author(s): Ryan Lortie
28 * Google Author(s): Behdad Esfahbod
29 */
30
31#include "hb-private.hh"
32
33#include "hb-unicode-private.hh"
34
35
36
37/*
38 * hb_unicode_funcs_t
39 */
40
41static hb_unicode_combining_class_t
42hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
43				hb_codepoint_t      unicode   HB_UNUSED,
44				void               *user_data HB_UNUSED)
45{
46  return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED;
47}
48
49static unsigned int
50hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
51				hb_codepoint_t      unicode   HB_UNUSED,
52				void               *user_data HB_UNUSED)
53{
54  return 1;
55}
56
57static hb_unicode_general_category_t
58hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
59				 hb_codepoint_t      unicode   HB_UNUSED,
60				 void               *user_data HB_UNUSED)
61{
62  return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
63}
64
65static hb_codepoint_t
66hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
67			  hb_codepoint_t      unicode   HB_UNUSED,
68			  void               *user_data HB_UNUSED)
69{
70  return unicode;
71}
72
73static hb_script_t
74hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
75		       hb_codepoint_t      unicode   HB_UNUSED,
76		       void               *user_data HB_UNUSED)
77{
78  return HB_SCRIPT_UNKNOWN;
79}
80
81static hb_bool_t
82hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
83			hb_codepoint_t      a         HB_UNUSED,
84			hb_codepoint_t      b         HB_UNUSED,
85			hb_codepoint_t     *ab        HB_UNUSED,
86			void               *user_data HB_UNUSED)
87{
88  return false;
89}
90
91static hb_bool_t
92hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs    HB_UNUSED,
93			  hb_codepoint_t      ab        HB_UNUSED,
94			  hb_codepoint_t     *a         HB_UNUSED,
95			  hb_codepoint_t     *b         HB_UNUSED,
96			  void               *user_data HB_UNUSED)
97{
98  return false;
99}
100
101
102static unsigned int
103hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs     HB_UNUSED,
104					hb_codepoint_t      u          HB_UNUSED,
105					hb_codepoint_t     *decomposed HB_UNUSED,
106					void               *user_data  HB_UNUSED)
107{
108  return 0;
109}
110
111
112#define HB_UNICODE_FUNCS_IMPLEMENT_SET \
113  HB_UNICODE_FUNCS_IMPLEMENT (glib) \
114  HB_UNICODE_FUNCS_IMPLEMENT (icu) \
115  HB_UNICODE_FUNCS_IMPLEMENT (ucdn) \
116  HB_UNICODE_FUNCS_IMPLEMENT (nil) \
117  /* ^--- Add new callbacks before nil */
118
119#define hb_nil_get_unicode_funcs hb_unicode_funcs_get_empty
120
121/* Prototype them all */
122#define HB_UNICODE_FUNCS_IMPLEMENT(set) \
123extern "C" hb_unicode_funcs_t *hb_##set##_get_unicode_funcs (void);
124HB_UNICODE_FUNCS_IMPLEMENT_SET
125#undef HB_UNICODE_FUNCS_IMPLEMENT
126
127
128hb_unicode_funcs_t *
129hb_unicode_funcs_get_default (void)
130{
131#define HB_UNICODE_FUNCS_IMPLEMENT(set) \
132  return hb_##set##_get_unicode_funcs ();
133
134#ifdef HAVE_GLIB
135  HB_UNICODE_FUNCS_IMPLEMENT(glib)
136#elif defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
137  HB_UNICODE_FUNCS_IMPLEMENT(icu)
138#elif defined(HAVE_UCDN)
139  HB_UNICODE_FUNCS_IMPLEMENT(ucdn)
140#else
141#define HB_UNICODE_FUNCS_NIL 1
142  HB_UNICODE_FUNCS_IMPLEMENT(nil)
143#endif
144
145#undef HB_UNICODE_FUNCS_IMPLEMENT
146}
147
148#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
149#ifdef _MSC_VER
150#pragma message("Could not find any Unicode functions implementation, you have to provide your own")
151#pragma message("To suppress this warnings, define HB_NO_UNICODE_FUNCS")
152#else
153#warning "Could not find any Unicode functions implementation, you have to provide your own"
154#warning "To suppress this warning, define HB_NO_UNICODE_FUNCS"
155#endif
156#endif
157
158/**
159 * hb_unicode_funcs_create: (Xconstructor)
160 * @parent: (allow-none):
161 *
162 *
163 *
164 * Return value: (transfer full):
165 *
166 * Since: 1.0
167 **/
168hb_unicode_funcs_t *
169hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
170{
171  hb_unicode_funcs_t *ufuncs;
172
173  if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
174    return hb_unicode_funcs_get_empty ();
175
176  if (!parent)
177    parent = hb_unicode_funcs_get_empty ();
178
179  hb_unicode_funcs_make_immutable (parent);
180  ufuncs->parent = hb_unicode_funcs_reference (parent);
181
182  ufuncs->func = parent->func;
183
184  /* We can safely copy user_data from parent since we hold a reference
185   * onto it and it's immutable.  We should not copy the destroy notifiers
186   * though. */
187  ufuncs->user_data = parent->user_data;
188
189  return ufuncs;
190}
191
192
193const hb_unicode_funcs_t _hb_unicode_funcs_nil = {
194  HB_OBJECT_HEADER_STATIC,
195
196  NULL, /* parent */
197  true, /* immutable */
198  {
199#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
200    HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
201#undef HB_UNICODE_FUNC_IMPLEMENT
202  }
203};
204
205/**
206 * hb_unicode_funcs_get_empty:
207 *
208 *
209 *
210 * Return value: (transfer full):
211 *
212 * Since: 1.0
213 **/
214hb_unicode_funcs_t *
215hb_unicode_funcs_get_empty (void)
216{
217  return const_cast<hb_unicode_funcs_t *> (&_hb_unicode_funcs_nil);
218}
219
220/**
221 * hb_unicode_funcs_reference: (skip)
222 * @ufuncs: Unicode functions.
223 *
224 *
225 *
226 * Return value: (transfer full):
227 *
228 * Since: 1.0
229 **/
230hb_unicode_funcs_t *
231hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
232{
233  return hb_object_reference (ufuncs);
234}
235
236/**
237 * hb_unicode_funcs_destroy: (skip)
238 * @ufuncs: Unicode functions.
239 *
240 *
241 *
242 * Since: 1.0
243 **/
244void
245hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
246{
247  if (!hb_object_destroy (ufuncs)) return;
248
249#define HB_UNICODE_FUNC_IMPLEMENT(name) \
250  if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
251    HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
252#undef HB_UNICODE_FUNC_IMPLEMENT
253
254  hb_unicode_funcs_destroy (ufuncs->parent);
255
256  free (ufuncs);
257}
258
259/**
260 * hb_unicode_funcs_set_user_data: (skip)
261 * @ufuncs: Unicode functions.
262 * @key:
263 * @data:
264 * @destroy:
265 * @replace:
266 *
267 *
268 *
269 * Return value:
270 *
271 * Since: 1.0
272 **/
273hb_bool_t
274hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
275			        hb_user_data_key_t *key,
276			        void *              data,
277			        hb_destroy_func_t   destroy,
278				hb_bool_t           replace)
279{
280  return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
281}
282
283/**
284 * hb_unicode_funcs_get_user_data: (skip)
285 * @ufuncs: Unicode functions.
286 * @key:
287 *
288 *
289 *
290 * Return value: (transfer none):
291 *
292 * Since: 1.0
293 **/
294void *
295hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
296			        hb_user_data_key_t *key)
297{
298  return hb_object_get_user_data (ufuncs, key);
299}
300
301
302/**
303 * hb_unicode_funcs_make_immutable:
304 * @ufuncs: Unicode functions.
305 *
306 *
307 *
308 * Since: 1.0
309 **/
310void
311hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
312{
313  if (unlikely (hb_object_is_inert (ufuncs)))
314    return;
315
316  ufuncs->immutable = true;
317}
318
319/**
320 * hb_unicode_funcs_is_immutable:
321 * @ufuncs: Unicode functions.
322 *
323 *
324 *
325 * Return value:
326 *
327 * Since: 1.0
328 **/
329hb_bool_t
330hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
331{
332  return ufuncs->immutable;
333}
334
335/**
336 * hb_unicode_funcs_get_parent:
337 * @ufuncs: Unicode functions.
338 *
339 *
340 *
341 * Return value:
342 *
343 * Since: 1.0
344 **/
345hb_unicode_funcs_t *
346hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
347{
348  return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
349}
350
351
352#define HB_UNICODE_FUNC_IMPLEMENT(name)						\
353										\
354void										\
355hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t		   *ufuncs,	\
356				    hb_unicode_##name##_func_t	    func,	\
357				    void			   *user_data,	\
358				    hb_destroy_func_t		    destroy)	\
359{										\
360  if (ufuncs->immutable)							\
361    return;									\
362										\
363  if (ufuncs->destroy.name)							\
364    ufuncs->destroy.name (ufuncs->user_data.name);				\
365										\
366  if (func) {									\
367    ufuncs->func.name = func;							\
368    ufuncs->user_data.name = user_data;						\
369    ufuncs->destroy.name = destroy;						\
370  } else {									\
371    ufuncs->func.name = ufuncs->parent->func.name;				\
372    ufuncs->user_data.name = ufuncs->parent->user_data.name;			\
373    ufuncs->destroy.name = NULL;						\
374  }										\
375}
376
377HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
378#undef HB_UNICODE_FUNC_IMPLEMENT
379
380
381#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name)				\
382										\
383return_type									\
384hb_unicode_##name (hb_unicode_funcs_t *ufuncs,					\
385		   hb_codepoint_t      unicode)					\
386{										\
387  return ufuncs->name (unicode);						\
388}
389HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
390#undef HB_UNICODE_FUNC_IMPLEMENT
391
392/**
393 * hb_unicode_compose:
394 * @ufuncs: Unicode functions.
395 * @a:
396 * @b:
397 * @ab: (out):
398 *
399 *
400 *
401 * Return value:
402 *
403 * Since: 1.0
404 **/
405hb_bool_t
406hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
407		    hb_codepoint_t      a,
408		    hb_codepoint_t      b,
409		    hb_codepoint_t     *ab)
410{
411  return ufuncs->compose (a, b, ab);
412}
413
414/**
415 * hb_unicode_decompose:
416 * @ufuncs: Unicode functions.
417 * @ab:
418 * @a: (out):
419 * @b: (out):
420 *
421 *
422 *
423 * Return value:
424 *
425 * Since: 1.0
426 **/
427hb_bool_t
428hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
429		      hb_codepoint_t      ab,
430		      hb_codepoint_t     *a,
431		      hb_codepoint_t     *b)
432{
433  return ufuncs->decompose (ab, a, b);
434}
435
436/**
437 * hb_unicode_decompose_compatibility:
438 * @ufuncs: Unicode functions.
439 * @u:
440 * @decomposed: (out):
441 *
442 *
443 *
444 * Return value:
445 *
446 * Since: 1.0
447 **/
448unsigned int
449hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
450				    hb_codepoint_t      u,
451				    hb_codepoint_t     *decomposed)
452{
453  return ufuncs->decompose_compatibility (u, decomposed);
454}
455
456
457/* See hb-unicode-private.hh for details. */
458const uint8_t
459_hb_modified_combining_class[256] =
460{
461  0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
462  1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
463  2, 3, 4, 5, 6,
464  7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
465  8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
466  9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
467
468  /* Hebrew */
469  HB_MODIFIED_COMBINING_CLASS_CCC10,
470  HB_MODIFIED_COMBINING_CLASS_CCC11,
471  HB_MODIFIED_COMBINING_CLASS_CCC12,
472  HB_MODIFIED_COMBINING_CLASS_CCC13,
473  HB_MODIFIED_COMBINING_CLASS_CCC14,
474  HB_MODIFIED_COMBINING_CLASS_CCC15,
475  HB_MODIFIED_COMBINING_CLASS_CCC16,
476  HB_MODIFIED_COMBINING_CLASS_CCC17,
477  HB_MODIFIED_COMBINING_CLASS_CCC18,
478  HB_MODIFIED_COMBINING_CLASS_CCC19,
479  HB_MODIFIED_COMBINING_CLASS_CCC20,
480  HB_MODIFIED_COMBINING_CLASS_CCC21,
481  HB_MODIFIED_COMBINING_CLASS_CCC22,
482  HB_MODIFIED_COMBINING_CLASS_CCC23,
483  HB_MODIFIED_COMBINING_CLASS_CCC24,
484  HB_MODIFIED_COMBINING_CLASS_CCC25,
485  HB_MODIFIED_COMBINING_CLASS_CCC26,
486
487  /* Arabic */
488  HB_MODIFIED_COMBINING_CLASS_CCC27,
489  HB_MODIFIED_COMBINING_CLASS_CCC28,
490  HB_MODIFIED_COMBINING_CLASS_CCC29,
491  HB_MODIFIED_COMBINING_CLASS_CCC30,
492  HB_MODIFIED_COMBINING_CLASS_CCC31,
493  HB_MODIFIED_COMBINING_CLASS_CCC32,
494  HB_MODIFIED_COMBINING_CLASS_CCC33,
495  HB_MODIFIED_COMBINING_CLASS_CCC34,
496  HB_MODIFIED_COMBINING_CLASS_CCC35,
497
498  /* Syriac */
499  HB_MODIFIED_COMBINING_CLASS_CCC36,
500
501  37, 38, 39,
502  40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
503  60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
504  80, 81, 82, 83,
505
506  /* Telugu */
507  HB_MODIFIED_COMBINING_CLASS_CCC84,
508  85, 86, 87, 88, 89, 90,
509  HB_MODIFIED_COMBINING_CLASS_CCC91,
510  92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
511
512  /* Thai */
513  HB_MODIFIED_COMBINING_CLASS_CCC103,
514  104, 105, 106,
515  HB_MODIFIED_COMBINING_CLASS_CCC107,
516  108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
517
518  /* Lao */
519  HB_MODIFIED_COMBINING_CLASS_CCC118,
520  119, 120, 121,
521  HB_MODIFIED_COMBINING_CLASS_CCC122,
522  123, 124, 125, 126, 127, 128,
523
524  /* Tibetan */
525  HB_MODIFIED_COMBINING_CLASS_CCC129,
526  HB_MODIFIED_COMBINING_CLASS_CCC130,
527  131,
528  HB_MODIFIED_COMBINING_CLASS_CCC132,
529  133, 134, 135, 136, 137, 138, 139,
530
531
532  140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
533  150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
534  160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
535  170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
536  180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
537  190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
538
539  200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
540  201,
541  202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
542  203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
543  214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
544  215,
545  216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
546  217,
547  218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
548  219,
549  220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
550  221,
551  222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
552  223,
553  224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
554  225,
555  226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
556  227,
557  228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
558  229,
559  230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
560  231,
561  232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
562  233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
563  234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
564  235, 236, 237, 238, 239,
565  240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
566  241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
567  255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
568};
569