1/*
2 * Copyright © 2009,2010  Red Hat, Inc.
3 * Copyright © 2011,2012  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#include "hb-private.hh"
30
31#include "hb-version.h"
32
33#include "hb-mutex-private.hh"
34#include "hb-object-private.hh"
35
36#include <locale.h>
37
38
39/* hb_options_t */
40
41hb_options_union_t _hb_options;
42
43void
44_hb_options_init (void)
45{
46  hb_options_union_t u;
47  u.i = 0;
48  u.opts.initialized = 1;
49
50  char *c = getenv ("HB_OPTIONS");
51  u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible");
52
53  /* This is idempotent and threadsafe. */
54  _hb_options = u;
55}
56
57
58/* hb_tag_t */
59
60hb_tag_t
61hb_tag_from_string (const char *s, int len)
62{
63  char tag[4];
64  unsigned int i;
65
66  if (!s || !len || !*s)
67    return HB_TAG_NONE;
68
69  if (len < 0 || len > 4)
70    len = 4;
71  for (i = 0; i < (unsigned) len && s[i]; i++)
72    tag[i] = s[i];
73  for (; i < 4; i++)
74    tag[i] = ' ';
75
76  return HB_TAG_CHAR4 (tag);
77}
78
79void
80hb_tag_to_string (hb_tag_t tag, char *buf)
81{
82  buf[0] = (char) (uint8_t) (tag >> 24);
83  buf[1] = (char) (uint8_t) (tag >> 16);
84  buf[2] = (char) (uint8_t) (tag >>  8);
85  buf[3] = (char) (uint8_t) (tag >>  0);
86}
87
88
89/* hb_direction_t */
90
91const char direction_strings[][4] = {
92  "ltr",
93  "rtl",
94  "ttb",
95  "btt"
96};
97
98hb_direction_t
99hb_direction_from_string (const char *str, int len)
100{
101  if (unlikely (!str || !len || !*str))
102    return HB_DIRECTION_INVALID;
103
104  /* Lets match loosely: just match the first letter, such that
105   * all of "ltr", "left-to-right", etc work!
106   */
107  char c = TOLOWER (str[0]);
108  for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
109    if (c == direction_strings[i][0])
110      return (hb_direction_t) (HB_DIRECTION_LTR + i);
111
112  return HB_DIRECTION_INVALID;
113}
114
115const char *
116hb_direction_to_string (hb_direction_t direction)
117{
118  if (likely ((unsigned int) (direction - HB_DIRECTION_LTR)
119	      < ARRAY_LENGTH (direction_strings)))
120    return direction_strings[direction - HB_DIRECTION_LTR];
121
122  return "invalid";
123}
124
125
126/* hb_language_t */
127
128struct hb_language_impl_t {
129  const char s[1];
130};
131
132static const char canon_map[256] = {
133   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
134   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
135   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
136  '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
137  '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
138  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
139   0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
140  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
141};
142
143static hb_bool_t
144lang_equal (hb_language_t  v1,
145	    const void    *v2)
146{
147  const unsigned char *p1 = (const unsigned char *) v1;
148  const unsigned char *p2 = (const unsigned char *) v2;
149
150  while (*p1 && *p1 == canon_map[*p2])
151    p1++, p2++;
152
153  return *p1 == canon_map[*p2];
154}
155
156#if 0
157static unsigned int
158lang_hash (const void *key)
159{
160  const unsigned char *p = key;
161  unsigned int h = 0;
162  while (canon_map[*p])
163    {
164      h = (h << 5) - h + canon_map[*p];
165      p++;
166    }
167
168  return h;
169}
170#endif
171
172
173struct hb_language_item_t {
174
175  struct hb_language_item_t *next;
176  hb_language_t lang;
177
178  inline bool operator == (const char *s) const {
179    return lang_equal (lang, s);
180  }
181
182  inline hb_language_item_t & operator = (const char *s) {
183    lang = (hb_language_t) strdup (s);
184    for (unsigned char *p = (unsigned char *) lang; *p; p++)
185      *p = canon_map[*p];
186
187    return *this;
188  }
189
190  void finish (void) { free (lang); }
191};
192
193
194/* Thread-safe lock-free language list */
195
196static hb_language_item_t *langs;
197
198static inline
199void free_langs (void)
200{
201  while (langs) {
202    hb_language_item_t *next = langs->next;
203    langs->finish ();
204    free (langs);
205    langs = next;
206  }
207}
208
209static hb_language_item_t *
210lang_find_or_insert (const char *key)
211{
212retry:
213  hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs);
214
215  for (hb_language_item_t *lang = first_lang; lang; lang = lang->next)
216    if (*lang == key)
217      return lang;
218
219  /* Not found; allocate one. */
220  hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t));
221  if (unlikely (!lang))
222    return NULL;
223  lang->next = first_lang;
224  *lang = key;
225
226  if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) {
227    free (lang);
228    goto retry;
229  }
230
231#ifdef HAVE_ATEXIT
232  if (!first_lang)
233    atexit (free_langs); /* First person registers atexit() callback. */
234#endif
235
236  return lang;
237}
238
239
240hb_language_t
241hb_language_from_string (const char *str, int len)
242{
243  if (!str || !len || !*str)
244    return HB_LANGUAGE_INVALID;
245
246  char strbuf[32];
247  if (len >= 0) {
248    len = MIN (len, (int) sizeof (strbuf) - 1);
249    str = (char *) memcpy (strbuf, str, len);
250    strbuf[len] = '\0';
251  }
252
253  hb_language_item_t *item = lang_find_or_insert (str);
254
255  return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
256}
257
258const char *
259hb_language_to_string (hb_language_t language)
260{
261  /* This is actually NULL-safe! */
262  return language->s;
263}
264
265hb_language_t
266hb_language_get_default (void)
267{
268  static hb_language_t default_language = HB_LANGUAGE_INVALID;
269
270  hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language);
271  if (unlikely (language == HB_LANGUAGE_INVALID)) {
272    language = hb_language_from_string (setlocale (LC_CTYPE, NULL), -1);
273    hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language);
274  }
275
276  return default_language;
277}
278
279
280/* hb_script_t */
281
282hb_script_t
283hb_script_from_iso15924_tag (hb_tag_t tag)
284{
285  if (unlikely (tag == HB_TAG_NONE))
286    return HB_SCRIPT_INVALID;
287
288  /* Be lenient, adjust case (one capital letter followed by three small letters) */
289  tag = (tag & 0xDFDFDFDF) | 0x00202020;
290
291  switch (tag) {
292
293    /* These graduated from the 'Q' private-area codes, but
294     * the old code is still aliased by Unicode, and the Qaai
295     * one in use by ICU. */
296    case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
297    case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
298
299    /* Script variants from http://unicode.org/iso15924/ */
300    case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
301    case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
302    case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
303    case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
304    case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
305    case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
306  }
307
308  /* If it looks right, just use the tag as a script */
309  if (((uint32_t) tag & 0xE0E0E0E0) == 0x40606060)
310    return (hb_script_t) tag;
311
312  /* Otherwise, return unknown */
313  return HB_SCRIPT_UNKNOWN;
314}
315
316hb_script_t
317hb_script_from_string (const char *s, int len)
318{
319  return hb_script_from_iso15924_tag (hb_tag_from_string (s, len));
320}
321
322hb_tag_t
323hb_script_to_iso15924_tag (hb_script_t script)
324{
325  return (hb_tag_t) script;
326}
327
328hb_direction_t
329hb_script_get_horizontal_direction (hb_script_t script)
330{
331  /* http://goo.gl/x9ilM */
332  switch ((hb_tag_t) script)
333  {
334    /* Unicode-1.1 additions */
335    case HB_SCRIPT_ARABIC:
336    case HB_SCRIPT_HEBREW:
337
338    /* Unicode-3.0 additions */
339    case HB_SCRIPT_SYRIAC:
340    case HB_SCRIPT_THAANA:
341
342    /* Unicode-4.0 additions */
343    case HB_SCRIPT_CYPRIOT:
344
345    /* Unicode-4.1 additions */
346    case HB_SCRIPT_KHAROSHTHI:
347
348    /* Unicode-5.0 additions */
349    case HB_SCRIPT_PHOENICIAN:
350    case HB_SCRIPT_NKO:
351
352    /* Unicode-5.1 additions */
353    case HB_SCRIPT_LYDIAN:
354
355    /* Unicode-5.2 additions */
356    case HB_SCRIPT_AVESTAN:
357    case HB_SCRIPT_IMPERIAL_ARAMAIC:
358    case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
359    case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
360    case HB_SCRIPT_OLD_SOUTH_ARABIAN:
361    case HB_SCRIPT_OLD_TURKIC:
362    case HB_SCRIPT_SAMARITAN:
363
364    /* Unicode-6.0 additions */
365    case HB_SCRIPT_MANDAIC:
366
367    /* Unicode-6.1 additions */
368    case HB_SCRIPT_MEROITIC_CURSIVE:
369    case HB_SCRIPT_MEROITIC_HIEROGLYPHS:
370
371      return HB_DIRECTION_RTL;
372  }
373
374  return HB_DIRECTION_LTR;
375}
376
377
378/* hb_user_data_array_t */
379
380bool
381hb_user_data_array_t::set (hb_user_data_key_t *key,
382			   void *              data,
383			   hb_destroy_func_t   destroy,
384			   hb_bool_t           replace)
385{
386  if (!key)
387    return false;
388
389  if (replace) {
390    if (!data && !destroy) {
391      items.remove (key, lock);
392      return true;
393    }
394  }
395  hb_user_data_item_t item = {key, data, destroy};
396  bool ret = !!items.replace_or_insert (item, lock, replace);
397
398  return ret;
399}
400
401void *
402hb_user_data_array_t::get (hb_user_data_key_t *key)
403{
404  hb_user_data_item_t item = {NULL };
405
406  return items.find (key, &item, lock) ? item.data : NULL;
407}
408
409
410/* hb_version */
411
412void
413hb_version (unsigned int *major,
414	    unsigned int *minor,
415	    unsigned int *micro)
416{
417  *major = HB_VERSION_MAJOR;
418  *minor = HB_VERSION_MINOR;
419  *micro = HB_VERSION_MICRO;
420}
421
422const char *
423hb_version_string (void)
424{
425  return HB_VERSION_STRING;
426}
427
428hb_bool_t
429hb_version_check (unsigned int major,
430		  unsigned int minor,
431		  unsigned int micro)
432{
433  return HB_VERSION_CHECK (major, minor, micro);
434}
435