hb-common.cc revision 218e67b9eefa26e2e4fe43f99a84d082b185b1b0
1/*
2 * Copyright © 2009,2010  Red Hat, Inc.
3 * Copyright © 2011  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#include "hb-private.hh"
30
31#include "hb-mutex-private.hh"
32#include "hb-object-private.hh"
33
34HB_BEGIN_DECLS
35
36
37/* hb_tag_t */
38
39hb_tag_t
40hb_tag_from_string (const char *s)
41{
42  char tag[4];
43  unsigned int i;
44
45  if (!s || !*s)
46    return HB_TAG_NONE;
47
48  for (i = 0; i < 4 && s[i]; i++)
49    tag[i] = s[i];
50  for (; i < 4; i++)
51    tag[i] = ' ';
52
53  return HB_TAG_CHAR4 (tag);
54}
55
56
57/* hb_direction_t */
58
59const char direction_strings[][4] = {
60  "ltr",
61  "rtl",
62  "ttb",
63  "btt"
64};
65
66hb_direction_t
67hb_direction_from_string (const char *str)
68{
69  if (unlikely (!str || !*str))
70    return HB_DIRECTION_INVALID;
71
72  /* Lets match loosely: just match the first letter, such that
73   * all of "ltr", "left-to-right", etc work!
74   */
75  char c = TOLOWER (str[0]);
76  for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
77    if (c == direction_strings[i][0])
78      return (hb_direction_t) i;
79
80  return HB_DIRECTION_INVALID;
81}
82
83const char *
84hb_direction_to_string (hb_direction_t direction)
85{
86  if (likely ((unsigned int) direction < ARRAY_LENGTH (direction_strings)))
87    return direction_strings[direction];
88
89  return "invalid";
90}
91
92
93/* hb_language_t */
94
95struct _hb_language_t {
96  const char s[1];
97};
98
99static const char canon_map[256] = {
100   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
101   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
102   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
103  '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
104  '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
105  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
106   0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
107  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
108};
109
110static hb_bool_t
111lang_equal (const void *v1,
112	    const void *v2)
113{
114  const unsigned char *p1 = (const unsigned char *) v1;
115  const unsigned char *p2 = (const unsigned char *) v2;
116
117  while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2])
118    {
119      p1++, p2++;
120    }
121
122  return (canon_map[*p1] == canon_map[*p2]);
123}
124
125#if 0
126static unsigned int
127lang_hash (const void *key)
128{
129  const unsigned char *p = key;
130  unsigned int h = 0;
131  while (canon_map[*p])
132    {
133      h = (h << 5) - h + canon_map[*p];
134      p++;
135    }
136
137  return h;
138}
139#endif
140
141
142struct hb_language_item_t {
143
144  hb_language_t lang;
145
146  inline bool operator == (const char *s) const {
147    return lang_equal (lang, s);
148  }
149
150  inline hb_language_item_t & operator = (const char *s) {
151    lang = (hb_language_t) strdup (s);
152    for (unsigned char *p = (unsigned char *) lang; *p; p++)
153      *p = canon_map[*p];
154
155    return *this;
156  }
157
158  void finish (void) { free (lang); }
159};
160
161static hb_threadsafe_set_t<hb_language_item_t> langs;
162
163hb_language_t
164hb_language_from_string (const char *str)
165{
166  if (!str || !*str)
167    return NULL;
168
169  hb_language_item_t *item = langs.find_or_insert (str);
170
171  return likely (item) ? item->lang : NULL;
172}
173
174const char *
175hb_language_to_string (hb_language_t language)
176{
177  return language->s;
178}
179
180
181/* hb_script_t */
182
183hb_script_t
184hb_script_from_iso15924_tag (hb_tag_t tag)
185{
186  if (unlikely (tag == HB_TAG_NONE))
187    return HB_SCRIPT_INVALID;
188
189  /* Be lenient, adjust case (one capital letter followed by three small letters) */
190  tag = (tag & 0xDFDFDFDF) | 0x00202020;
191
192  switch (tag) {
193
194    /* These graduated from the 'Q' private-area codes, but
195     * the old code is still aliased by Unicode, and the Qaai
196     * one in use by ICU. */
197    case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
198    case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
199
200    /* Script variants from http://unicode.org/iso15924/ */
201    case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
202    case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
203    case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
204    case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
205    case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
206    case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
207  }
208
209  /* If it looks right, just use the tag as a script */
210  if (((uint32_t) tag & 0xE0E0E0E0) == 0x40606060)
211    return (hb_script_t) tag;
212
213  /* Otherwise, return unknown */
214  return HB_SCRIPT_UNKNOWN;
215}
216
217hb_script_t
218hb_script_from_string (const char *s)
219{
220  return hb_script_from_iso15924_tag (hb_tag_from_string (s));
221}
222
223hb_tag_t
224hb_script_to_iso15924_tag (hb_script_t script)
225{
226  return (hb_tag_t) script;
227}
228
229hb_direction_t
230hb_script_get_horizontal_direction (hb_script_t script)
231{
232  switch ((hb_tag_t) script)
233  {
234    case HB_SCRIPT_ARABIC:
235    case HB_SCRIPT_HEBREW:
236    case HB_SCRIPT_SYRIAC:
237    case HB_SCRIPT_THAANA:
238
239    /* Unicode-4.0 additions */
240    case HB_SCRIPT_CYPRIOT:
241
242    /* Unicode-5.0 additions */
243    case HB_SCRIPT_PHOENICIAN:
244    case HB_SCRIPT_NKO:
245
246    /* Unicode-5.2 additions */
247    case HB_SCRIPT_AVESTAN:
248    case HB_SCRIPT_IMPERIAL_ARAMAIC:
249    case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
250    case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
251    case HB_SCRIPT_OLD_SOUTH_ARABIAN:
252    case HB_SCRIPT_OLD_TURKIC:
253    case HB_SCRIPT_SAMARITAN:
254
255    /* Unicode-6.0 additions */
256    case HB_SCRIPT_MANDAIC:
257
258      return HB_DIRECTION_RTL;
259  }
260
261  return HB_DIRECTION_LTR;
262}
263
264
265/* hb_user_data_array_t */
266
267bool
268hb_user_data_array_t::set (hb_user_data_key_t *key,
269			   void *              data,
270			   hb_destroy_func_t   destroy)
271{
272  if (!key)
273    return false;
274  if (!data && !destroy) {
275    items.remove (key);
276    return true;
277  }
278  hb_user_data_item_t item = {key, data, destroy};
279  return !!items.insert (item);
280}
281
282void *
283hb_user_data_array_t::get (hb_user_data_key_t *key)
284{
285  hb_user_data_item_t *item = items.find (key);
286  return item ? item->data : NULL;
287}
288
289
290HB_END_DECLS
291