1/*
2 * Copyright © 2009,2010  Red Hat, Inc.
3 * Copyright © 2011,2012  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#include "hb-private.hh"
30
31#include "hb-mutex-private.hh"
32#include "hb-object-private.hh"
33
34#include <locale.h>
35
36
37/* hb_options_t */
38
39hb_options_union_t _hb_options;
40
41void
42_hb_options_init (void)
43{
44  hb_options_union_t u;
45  u.i = 0;
46  u.opts.initialized = 1;
47
48  char *c = getenv ("HB_OPTIONS");
49  u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
50
51  /* This is idempotent and threadsafe. */
52  _hb_options = u;
53}
54
55
56/* hb_tag_t */
57
58/**
59 * hb_tag_from_string:
60 * @str: (array length=len) (element-type uint8_t):
61 * @len:
62 *
63 *
64 *
65 * Return value:
66 *
67 * Since: 0.9.2
68 **/
69hb_tag_t
70hb_tag_from_string (const char *str, int len)
71{
72  char tag[4];
73  unsigned int i;
74
75  if (!str || !len || !*str)
76    return HB_TAG_NONE;
77
78  if (len < 0 || len > 4)
79    len = 4;
80  for (i = 0; i < (unsigned) len && str[i]; i++)
81    tag[i] = str[i];
82  for (; i < 4; i++)
83    tag[i] = ' ';
84
85  return HB_TAG_CHAR4 (tag);
86}
87
88/**
89 * hb_tag_to_string:
90 * @tag:
91 * @buf: (array fixed-size=4):
92 *
93 *
94 *
95 * Since: 0.9.5
96 **/
97void
98hb_tag_to_string (hb_tag_t tag, char *buf)
99{
100  buf[0] = (char) (uint8_t) (tag >> 24);
101  buf[1] = (char) (uint8_t) (tag >> 16);
102  buf[2] = (char) (uint8_t) (tag >>  8);
103  buf[3] = (char) (uint8_t) (tag >>  0);
104}
105
106
107/* hb_direction_t */
108
109const char direction_strings[][4] = {
110  "ltr",
111  "rtl",
112  "ttb",
113  "btt"
114};
115
116/**
117 * hb_direction_from_string:
118 * @str: (array length=len) (element-type uint8_t):
119 * @len:
120 *
121 *
122 *
123 * Return value:
124 *
125 * Since: 0.9.2
126 **/
127hb_direction_t
128hb_direction_from_string (const char *str, int len)
129{
130  if (unlikely (!str || !len || !*str))
131    return HB_DIRECTION_INVALID;
132
133  /* Lets match loosely: just match the first letter, such that
134   * all of "ltr", "left-to-right", etc work!
135   */
136  char c = TOLOWER (str[0]);
137  for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
138    if (c == direction_strings[i][0])
139      return (hb_direction_t) (HB_DIRECTION_LTR + i);
140
141  return HB_DIRECTION_INVALID;
142}
143
144/**
145 * hb_direction_to_string:
146 * @direction:
147 *
148 *
149 *
150 * Return value: (transfer none):
151 *
152 * Since: 0.9.2
153 **/
154const char *
155hb_direction_to_string (hb_direction_t direction)
156{
157  if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
158	      < ARRAY_LENGTH (direction_strings)))
159    return direction_strings[direction - HB_DIRECTION_LTR];
160
161  return "invalid";
162}
163
164
165/* hb_language_t */
166
167struct hb_language_impl_t {
168  const char s[1];
169};
170
171static const char canon_map[256] = {
172   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
173   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
174   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
175  '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
176  '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
177  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
178   0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
179  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
180};
181
182static bool
183lang_equal (hb_language_t  v1,
184	    const void    *v2)
185{
186  const unsigned char *p1 = (const unsigned char *) v1;
187  const unsigned char *p2 = (const unsigned char *) v2;
188
189  while (*p1 && *p1 == canon_map[*p2])
190    p1++, p2++;
191
192  return *p1 == canon_map[*p2];
193}
194
195#if 0
196static unsigned int
197lang_hash (const void *key)
198{
199  const unsigned char *p = key;
200  unsigned int h = 0;
201  while (canon_map[*p])
202    {
203      h = (h << 5) - h + canon_map[*p];
204      p++;
205    }
206
207  return h;
208}
209#endif
210
211
212struct hb_language_item_t {
213
214  struct hb_language_item_t *next;
215  hb_language_t lang;
216
217  inline bool operator == (const char *s) const {
218    return lang_equal (lang, s);
219  }
220
221  inline hb_language_item_t & operator = (const char *s) {
222    lang = (hb_language_t) strdup (s);
223    for (unsigned char *p = (unsigned char *) lang; *p; p++)
224      *p = canon_map[*p];
225
226    return *this;
227  }
228
229  void finish (void) { free ((void *) lang); }
230};
231
232
233/* Thread-safe lock-free language list */
234
235static hb_language_item_t *langs;
236
237#ifdef HB_USE_ATEXIT
238static
239void free_langs (void)
240{
241  while (langs) {
242    hb_language_item_t *next = langs->next;
243    langs->finish ();
244    free (langs);
245    langs = next;
246  }
247}
248#endif
249
250static hb_language_item_t *
251lang_find_or_insert (const char *key)
252{
253retry:
254  hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs);
255
256  for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
257    if (*lang == key)
258      return lang;
259
260  /* Not found; allocate one. */
261  hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
262  if (unlikely (!lang))
263    return NULL;
264  lang->next = first_lang;
265  *lang = key;
266
267  if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) {
268    lang->finish ();
269    free (lang);
270    goto retry;
271  }
272
273#ifdef HB_USE_ATEXIT
274  if (!first_lang)
275    atexit (free_langs); /* First person registers atexit() callback. */
276#endif
277
278  return lang;
279}
280
281
282/**
283 * hb_language_from_string:
284 * @str: (array length=len) (element-type uint8_t): a string representing
285 *       ISO 639 language code
286 * @len: length of the @str, or -1 if it is %NULL-terminated.
287 *
288 * Converts @str representing an ISO 639 language code to the corresponding
289 * #hb_language_t.
290 *
291 * Return value: (transfer none):
292 * The #hb_language_t corresponding to the ISO 639 language code.
293 *
294 * Since: 0.9.2
295 **/
296hb_language_t
297hb_language_from_string (const char *str, int len)
298{
299  if (!str || !len || !*str)
300    return HB_LANGUAGE_INVALID;
301
302  hb_language_item_t *item = NULL;
303  if (len >= 0)
304  {
305    /* NUL-terminate it. */
306    char strbuf[64];
307    len = MIN (len, (int) sizeof (strbuf) - 1);
308    memcpy (strbuf, str, len);
309    strbuf[len] = '\0';
310    item = lang_find_or_insert (strbuf);
311  }
312  else
313    item = lang_find_or_insert (str);
314
315  return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
316}
317
318/**
319 * hb_language_to_string:
320 * @language: an #hb_language_t to convert.
321 *
322 * See hb_language_from_string().
323 *
324 * Return value: (transfer none):
325 * A %NULL-terminated string representing the @language. Must not be freed by
326 * the caller.
327 *
328 * Since: 0.9.2
329 **/
330const char *
331hb_language_to_string (hb_language_t language)
332{
333  /* This is actually NULL-safe! */
334  return language->s;
335}
336
337/**
338 * hb_language_get_default:
339 *
340 *
341 *
342 * Return value: (transfer none):
343 *
344 * Since: 0.9.2
345 **/
346hb_language_t
347hb_language_get_default (void)
348{
349  static hb_language_t default_language = HB_LANGUAGE_INVALID;
350
351  hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language);
352  if (unlikely (language == HB_LANGUAGE_INVALID)) {
353    language = hb_language_from_string (setlocale (LC_CTYPE, NULL), -1);
354    (void) hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language);
355  }
356
357  return default_language;
358}
359
360
361/* hb_script_t */
362
363/**
364 * hb_script_from_iso15924_tag:
365 * @tag: an #hb_tag_t representing an ISO 15924 tag.
366 *
367 * Converts an ISO 15924 script tag to a corresponding #hb_script_t.
368 *
369 * Return value:
370 * An #hb_script_t corresponding to the ISO 15924 tag.
371 *
372 * Since: 0.9.2
373 **/
374hb_script_t
375hb_script_from_iso15924_tag (hb_tag_t tag)
376{
377  if (unlikely (tag == HB_TAG_NONE))
378    return HB_SCRIPT_INVALID;
379
380  /* Be lenient, adjust case (one capital letter followed by three small letters) */
381  tag = (tag & 0xDFDFDFDFu) | 0x00202020u;
382
383  switch (tag) {
384
385    /* These graduated from the 'Q' private-area codes, but
386     * the old code is still aliased by Unicode, and the Qaai
387     * one in use by ICU. */
388    case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
389    case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
390
391    /* Script variants from http://unicode.org/iso15924/ */
392    case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
393    case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
394    case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
395    case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
396    case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
397    case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
398  }
399
400  /* If it looks right, just use the tag as a script */
401  if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u)
402    return (hb_script_t) tag;
403
404  /* Otherwise, return unknown */
405  return HB_SCRIPT_UNKNOWN;
406}
407
408/**
409 * hb_script_from_string:
410 * @str: (array length=len) (element-type uint8_t): a string representing an
411 *       ISO 15924 tag.
412 * @len: length of the @str, or -1 if it is %NULL-terminated.
413 *
414 * Converts a string @str representing an ISO 15924 script tag to a
415 * corresponding #hb_script_t. Shorthand for hb_tag_from_string() then
416 * hb_script_from_iso15924_tag().
417 *
418 * Return value:
419 * An #hb_script_t corresponding to the ISO 15924 tag.
420 *
421 * Since: 0.9.2
422 **/
423hb_script_t
424hb_script_from_string (const char *str, int len)
425{
426  return hb_script_from_iso15924_tag (hb_tag_from_string (str, len));
427}
428
429/**
430 * hb_script_to_iso15924_tag:
431 * @script: an #hb_script_ to convert.
432 *
433 * See hb_script_from_iso15924_tag().
434 *
435 * Return value:
436 * An #hb_tag_t representing an ISO 15924 script tag.
437 *
438 * Since: 0.9.2
439 **/
440hb_tag_t
441hb_script_to_iso15924_tag (hb_script_t script)
442{
443  return (hb_tag_t) script;
444}
445
446/**
447 * hb_script_get_horizontal_direction:
448 * @script:
449 *
450 *
451 *
452 * Return value:
453 *
454 * Since: 0.9.2
455 **/
456hb_direction_t
457hb_script_get_horizontal_direction (hb_script_t script)
458{
459  /* http://goo.gl/x9ilM */
460  switch ((hb_tag_t) script)
461  {
462    /* Unicode-1.1 additions */
463    case HB_SCRIPT_ARABIC:
464    case HB_SCRIPT_HEBREW:
465
466    /* Unicode-3.0 additions */
467    case HB_SCRIPT_SYRIAC:
468    case HB_SCRIPT_THAANA:
469
470    /* Unicode-4.0 additions */
471    case HB_SCRIPT_CYPRIOT:
472
473    /* Unicode-4.1 additions */
474    case HB_SCRIPT_KHAROSHTHI:
475
476    /* Unicode-5.0 additions */
477    case HB_SCRIPT_PHOENICIAN:
478    case HB_SCRIPT_NKO:
479
480    /* Unicode-5.1 additions */
481    case HB_SCRIPT_LYDIAN:
482
483    /* Unicode-5.2 additions */
484    case HB_SCRIPT_AVESTAN:
485    case HB_SCRIPT_IMPERIAL_ARAMAIC:
486    case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
487    case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
488    case HB_SCRIPT_OLD_SOUTH_ARABIAN:
489    case HB_SCRIPT_OLD_TURKIC:
490    case HB_SCRIPT_SAMARITAN:
491
492    /* Unicode-6.0 additions */
493    case HB_SCRIPT_MANDAIC:
494
495    /* Unicode-6.1 additions */
496    case HB_SCRIPT_MEROITIC_CURSIVE:
497    case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
498
499    /* Unicode-7.0 additions */
500    case HB_SCRIPT_MANICHAEAN:
501    case HB_SCRIPT_MENDE_KIKAKUI:
502    case HB_SCRIPT_NABATAEAN:
503    case HB_SCRIPT_OLD_NORTH_ARABIAN:
504    case HB_SCRIPT_PALMYRENE:
505    case HB_SCRIPT_PSALTER_PAHLAVI:
506
507    /* Unicode-8.0 additions */
508    case HB_SCRIPT_OLD_HUNGARIAN:
509
510      return HB_DIRECTION_RTL;
511  }
512
513  return HB_DIRECTION_LTR;
514}
515
516
517/* hb_user_data_array_t */
518
519bool
520hb_user_data_array_t::set (hb_user_data_key_t *key,
521			   void *              data,
522			   hb_destroy_func_t   destroy,
523			   hb_bool_t           replace)
524{
525  if (!key)
526    return false;
527
528  if (replace) {
529    if (!data && !destroy) {
530      items.remove (key, lock);
531      return true;
532    }
533  }
534  hb_user_data_item_t item = {key, data, destroy};
535  bool ret = !!items.replace_or_insert (item, lock, (bool) replace);
536
537  return ret;
538}
539
540void *
541hb_user_data_array_t::get (hb_user_data_key_t *key)
542{
543  hb_user_data_item_t item = {NULL, NULL, NULL};
544
545  return items.find (key, &item, lock) ? item.data : NULL;
546}
547
548
549/* hb_version */
550
551/**
552 * hb_version:
553 * @major: (out): Library major version component.
554 * @minor: (out): Library minor version component.
555 * @micro: (out): Library micro version component.
556 *
557 * Returns library version as three integer components.
558 *
559 * Since: 0.9.2
560 **/
561void
562hb_version (unsigned int *major,
563	    unsigned int *minor,
564	    unsigned int *micro)
565{
566  *major = HB_VERSION_MAJOR;
567  *minor = HB_VERSION_MINOR;
568  *micro = HB_VERSION_MICRO;
569}
570
571/**
572 * hb_version_string:
573 *
574 * Returns library version as a string with three components.
575 *
576 * Return value: library version string.
577 *
578 * Since: 0.9.2
579 **/
580const char *
581hb_version_string (void)
582{
583  return HB_VERSION_STRING;
584}
585
586/**
587 * hb_version_atleast:
588 * @major:
589 * @minor:
590 * @micro:
591 *
592 *
593 *
594 * Return value:
595 *
596 * Since: 0.9.30
597 **/
598hb_bool_t
599hb_version_atleast (unsigned int major,
600		    unsigned int minor,
601		    unsigned int micro)
602{
603  return HB_VERSION_ATLEAST (major, minor, micro);
604}
605