hb-common.cc revision 50e810cd0e55c25fddb0a2fd0861c51fbf65700e
1/*
2 * Copyright © 2009,2010  Red Hat, Inc.
3 * Copyright © 2011  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#include "hb-private.hh"
30
31#include "hb-version.h"
32
33#include "hb-mutex-private.hh"
34#include "hb-object-private.hh"
35
36#include <locale.h>
37
38
39
40/* hb_tag_t */
41
42hb_tag_t
43hb_tag_from_string (const char *s, int len)
44{
45  char tag[4];
46  unsigned int i;
47
48  if (!s || !len || !*s)
49    return HB_TAG_NONE;
50
51  if (len < 0 || len > 4)
52    len = 4;
53  for (i = 0; i < (unsigned) len && s[i]; i++)
54    tag[i] = s[i];
55  for (; i < 4; i++)
56    tag[i] = ' ';
57
58  return HB_TAG_CHAR4 (tag);
59}
60
61
62/* hb_direction_t */
63
64const char direction_strings[][4] = {
65  "ltr",
66  "rtl",
67  "ttb",
68  "btt"
69};
70
71hb_direction_t
72hb_direction_from_string (const char *str, int len)
73{
74  if (unlikely (!str || !len || !*str))
75    return HB_DIRECTION_INVALID;
76
77  /* Lets match loosely: just match the first letter, such that
78   * all of "ltr", "left-to-right", etc work!
79   */
80  char c = TOLOWER (str[0]);
81  for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
82    if (c == direction_strings[i][0])
83      return (hb_direction_t) i;
84
85  return HB_DIRECTION_INVALID;
86}
87
88const char *
89hb_direction_to_string (hb_direction_t direction)
90{
91  if (likely ((unsigned int) direction < ARRAY_LENGTH (direction_strings)))
92    return direction_strings[direction];
93
94  return "invalid";
95}
96
97
98/* hb_language_t */
99
100struct _hb_language_t {
101  const char s[1];
102};
103
104static const char canon_map[256] = {
105   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
106   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
107   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
108  '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
109  '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
110  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
111   0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
112  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
113};
114
115static hb_bool_t
116lang_equal (const void *v1,
117	    const void *v2)
118{
119  const unsigned char *p1 = (const unsigned char *) v1;
120  const unsigned char *p2 = (const unsigned char *) v2;
121
122  while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2])
123    {
124      p1++, p2++;
125    }
126
127  return (canon_map[*p1] == canon_map[*p2]);
128}
129
130#if 0
131static unsigned int
132lang_hash (const void *key)
133{
134  const unsigned char *p = key;
135  unsigned int h = 0;
136  while (canon_map[*p])
137    {
138      h = (h << 5) - h + canon_map[*p];
139      p++;
140    }
141
142  return h;
143}
144#endif
145
146
147struct hb_language_item_t {
148
149  hb_language_t lang;
150
151  inline bool operator == (const char *s) const {
152    return lang_equal (lang, s);
153  }
154
155  inline hb_language_item_t & operator = (const char *s) {
156    lang = (hb_language_t) strdup (s);
157    for (unsigned char *p = (unsigned char *) lang; *p; p++)
158      *p = canon_map[*p];
159
160    return *this;
161  }
162
163  void finish (void) { free (lang); }
164};
165
166static struct hb_static_lang_set_t : hb_lockable_set_t<hb_language_item_t, hb_static_mutex_t> {
167  ~hb_static_lang_set_t (void) { this->finish (lock); }
168  hb_static_mutex_t lock;
169} langs;
170
171hb_language_t
172hb_language_from_string (const char *str, int len)
173{
174  if (!str || !len || !*str)
175    return HB_LANGUAGE_INVALID;
176
177  char strbuf[32];
178  if (len >= 0) {
179    len = MIN (len, (int) sizeof (strbuf) - 1);
180    str = (char *) memcpy (strbuf, str, len);
181    strbuf[len] = '\0';
182  }
183
184  hb_language_item_t *item = langs.find_or_insert (str, langs.lock);
185
186  return likely (item) ? item->lang : HB_LANGUAGE_INVALID;
187}
188
189const char *
190hb_language_to_string (hb_language_t language)
191{
192  /* This is actually NULL-safe! */
193  return language->s;
194}
195
196hb_language_t
197hb_language_get_default (void)
198{
199  static hb_language_t default_language;
200
201  if (!default_language) {
202    /* This block is not quite threadsafe, but is not as bad as
203     * it looks since it's idempotent.  As long as pointer ops
204     * are atomic, we are safe. */
205
206    /* I hear that setlocale() doesn't honor env vars on Windows,
207     * but for now we ignore that. */
208
209    default_language = hb_language_from_string (setlocale (LC_CTYPE, NULL), -1);
210  }
211
212  return default_language;
213}
214
215
216/* hb_script_t */
217
218hb_script_t
219hb_script_from_iso15924_tag (hb_tag_t tag)
220{
221  if (unlikely (tag == HB_TAG_NONE))
222    return HB_SCRIPT_INVALID;
223
224  /* Be lenient, adjust case (one capital letter followed by three small letters) */
225  tag = (tag & 0xDFDFDFDF) | 0x00202020;
226
227  switch (tag) {
228
229    /* These graduated from the 'Q' private-area codes, but
230     * the old code is still aliased by Unicode, and the Qaai
231     * one in use by ICU. */
232    case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
233    case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
234
235    /* Script variants from http://unicode.org/iso15924/ */
236    case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
237    case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
238    case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
239    case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
240    case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
241    case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
242  }
243
244  /* If it looks right, just use the tag as a script */
245  if (((uint32_t) tag & 0xE0E0E0E0) == 0x40606060)
246    return (hb_script_t) tag;
247
248  /* Otherwise, return unknown */
249  return HB_SCRIPT_UNKNOWN;
250}
251
252hb_script_t
253hb_script_from_string (const char *s, int len)
254{
255  return hb_script_from_iso15924_tag (hb_tag_from_string (s, len));
256}
257
258hb_tag_t
259hb_script_to_iso15924_tag (hb_script_t script)
260{
261  return (hb_tag_t) script;
262}
263
264hb_direction_t
265hb_script_get_horizontal_direction (hb_script_t script)
266{
267  switch ((hb_tag_t) script)
268  {
269    case HB_SCRIPT_ARABIC:
270    case HB_SCRIPT_HEBREW:
271    case HB_SCRIPT_SYRIAC:
272    case HB_SCRIPT_THAANA:
273
274    /* Unicode-4.0 additions */
275    case HB_SCRIPT_CYPRIOT:
276
277    /* Unicode-4.1 additions */
278    case HB_SCRIPT_KHAROSHTHI:
279
280    /* Unicode-5.0 additions */
281    case HB_SCRIPT_PHOENICIAN:
282    case HB_SCRIPT_NKO:
283
284    /* Unicode-5.1 additions */
285    case HB_SCRIPT_LYDIAN:
286
287    /* Unicode-5.2 additions */
288    case HB_SCRIPT_AVESTAN:
289    case HB_SCRIPT_IMPERIAL_ARAMAIC:
290    case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
291    case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
292    case HB_SCRIPT_OLD_SOUTH_ARABIAN:
293    case HB_SCRIPT_OLD_TURKIC:
294    case HB_SCRIPT_SAMARITAN:
295
296    /* Unicode-6.0 additions */
297    case HB_SCRIPT_MANDAIC:
298
299      return HB_DIRECTION_RTL;
300  }
301
302  return HB_DIRECTION_LTR;
303}
304
305
306/* hb_user_data_array_t */
307
308
309/* NOTE: Currently we use a global lock for user_data access
310 * threadsafety.  If one day we add a mutex to any object, we
311 * should switch to using that insted for these too.
312 */
313
314static hb_static_mutex_t user_data_lock;
315
316bool
317hb_user_data_array_t::set (hb_user_data_key_t *key,
318			   void *              data,
319			   hb_destroy_func_t   destroy,
320			   hb_bool_t           replace)
321{
322  if (!key)
323    return false;
324
325  if (replace) {
326    if (!data && !destroy) {
327      items.remove (key, user_data_lock);
328      return true;
329    }
330  }
331  hb_user_data_item_t item = {key, data, destroy};
332  bool ret = !!items.replace_or_insert (item, user_data_lock, replace);
333
334  return ret;
335}
336
337void *
338hb_user_data_array_t::get (hb_user_data_key_t *key)
339{
340  hb_user_data_item_t item = {NULL };
341
342  return items.find (key, &item, user_data_lock) ? item.data : NULL;
343}
344
345void
346hb_user_data_array_t::finish (void)
347{
348  items.finish (user_data_lock);
349}
350
351
352/* hb_version */
353
354void
355hb_version (unsigned int *major,
356	    unsigned int *minor,
357	    unsigned int *micro)
358{
359  *major = HB_VERSION_MAJOR;
360  *minor = HB_VERSION_MINOR;
361  *micro = HB_VERSION_MICRO;
362}
363
364const char *
365hb_version_string (void)
366{
367  return HB_VERSION_STRING;
368}
369
370hb_bool_t
371hb_version_check (unsigned int major,
372		  unsigned int minor,
373		  unsigned int micro)
374{
375  return HB_VERSION_CHECK (major, minor, micro);
376}
377
378
379