hb-common.cc revision d02985ec5a24c659a0a133cc6bc103f1d76bcb29
1/*
2 * Copyright © 2009,2010  Red Hat, Inc.
3 * Copyright © 2011  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#include "hb-private.hh"
30
31HB_BEGIN_DECLS
32
33
34/* hb_tag_t */
35
36hb_tag_t
37hb_tag_from_string (const char *s)
38{
39  char tag[4];
40  unsigned int i;
41
42  if (!s || !*s)
43    return HB_TAG_NONE;
44
45  for (i = 0; i < 4 && s[i]; i++)
46    tag[i] = s[i];
47  for (; i < 4; i++)
48    tag[i] = ' ';
49
50  return HB_TAG_CHAR4 (tag);
51}
52
53
54/* hb_direction_t */
55
56const char direction_strings[][4] = {
57  "ltr",
58  "rtl",
59  "ttb",
60  "btt"
61};
62
63hb_direction_t
64hb_direction_from_string (const char *str)
65{
66  if (unlikely (!str || !*str))
67    return HB_DIRECTION_INVALID;
68
69  /* Lets match loosely: just match the first letter, such that
70   * all of "ltr", "left-to-right", etc work!
71   */
72  char c = TOLOWER (str[0]);
73  for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++)
74    if (c == direction_strings[i][0])
75      return (hb_direction_t) i;
76
77  return HB_DIRECTION_INVALID;
78}
79
80const char *
81hb_direction_to_string (hb_direction_t direction)
82{
83  if (likely ((unsigned int) direction < ARRAY_LENGTH (direction_strings)))
84    return direction_strings[direction];
85
86  return "invalid";
87}
88
89
90/* hb_language_t */
91
92struct _hb_language_t {
93  const char s[1];
94};
95
96static const char canon_map[256] = {
97   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
98   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
99   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
100  '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
101  '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
102  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
103   0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
104  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
105};
106
107static hb_bool_t
108lang_equal (const void *v1,
109	    const void *v2)
110{
111  const unsigned char *p1 = (const unsigned char *) v1;
112  const unsigned char *p2 = (const unsigned char *) v2;
113
114  while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2])
115    {
116      p1++, p2++;
117    }
118
119  return (canon_map[*p1] == canon_map[*p2]);
120}
121
122#if 0
123static unsigned int
124lang_hash (const void *key)
125{
126  const unsigned char *p = key;
127  unsigned int h = 0;
128  while (canon_map[*p])
129    {
130      h = (h << 5) - h + canon_map[*p];
131      p++;
132    }
133
134  return h;
135}
136#endif
137
138
139hb_language_t
140hb_language_from_string (const char *str)
141{
142  static unsigned int num_langs;
143  static unsigned int num_alloced;
144  static hb_language_t *langs;
145  unsigned int i;
146  unsigned char *p;
147
148  /* TODO Use a hash table or something */
149
150  if (!str || !*str)
151    return NULL;
152
153  for (i = 0; i < num_langs; i++)
154    if (lang_equal (str, langs[i]->s))
155      return langs[i];
156
157  if (unlikely (num_langs == num_alloced)) {
158    unsigned int new_alloced = 2 * (8 + num_alloced);
159    hb_language_t *new_langs = (hb_language_t *) realloc (langs, new_alloced * sizeof (langs[0]));
160    if (!new_langs)
161      return NULL;
162    num_alloced = new_alloced;
163    langs = new_langs;
164  }
165
166  langs[i] = (hb_language_t) strdup (str);
167  for (p = (unsigned char *) langs[i]->s; *p; p++)
168    *p = canon_map[*p];
169
170  num_langs++;
171
172  return langs[i];
173}
174
175const char *
176hb_language_to_string (hb_language_t language)
177{
178  return language->s;
179}
180
181
182/* hb_script_t */
183
184hb_script_t
185hb_script_from_iso15924_tag (hb_tag_t tag)
186{
187  if (unlikely (tag == HB_TAG_NONE))
188    return HB_SCRIPT_INVALID;
189
190  /* Be lenient, adjust case (one capital letter followed by three small letters) */
191  tag = (tag & 0xDFDFDFDF) | 0x00202020;
192
193  switch (tag) {
194
195    /* These graduated from the 'Q' private-area codes, but
196     * the old code is still aliased by Unicode, and the Qaai
197     * one in use by ICU. */
198    case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED;
199    case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC;
200
201    /* Script variants from http://unicode.org/iso15924/ */
202    case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
203    case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
204    case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
205    case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
206    case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
207    case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
208  }
209
210  /* If it looks right, just use the tag as a script */
211  if (((uint32_t) tag & 0xE0E0E0E0) == 0x40606060)
212    return (hb_script_t) tag;
213
214  /* Otherwise, return unknown */
215  return HB_SCRIPT_UNKNOWN;
216}
217
218hb_script_t
219hb_script_from_string (const char *s)
220{
221  return hb_script_from_iso15924_tag (hb_tag_from_string (s));
222}
223
224hb_tag_t
225hb_script_to_iso15924_tag (hb_script_t script)
226{
227  return (hb_tag_t) script;
228}
229
230hb_direction_t
231hb_script_get_horizontal_direction (hb_script_t script)
232{
233  switch ((hb_tag_t) script)
234  {
235    case HB_SCRIPT_ARABIC:
236    case HB_SCRIPT_HEBREW:
237    case HB_SCRIPT_SYRIAC:
238    case HB_SCRIPT_THAANA:
239
240    /* Unicode-4.0 additions */
241    case HB_SCRIPT_CYPRIOT:
242
243    /* Unicode-5.0 additions */
244    case HB_SCRIPT_PHOENICIAN:
245    case HB_SCRIPT_NKO:
246
247    /* Unicode-5.2 additions */
248    case HB_SCRIPT_AVESTAN:
249    case HB_SCRIPT_IMPERIAL_ARAMAIC:
250    case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
251    case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
252    case HB_SCRIPT_OLD_SOUTH_ARABIAN:
253    case HB_SCRIPT_OLD_TURKIC:
254    case HB_SCRIPT_SAMARITAN:
255
256    /* Unicode-6.0 additions */
257    case HB_SCRIPT_MANDAIC:
258
259      return HB_DIRECTION_RTL;
260  }
261
262  return HB_DIRECTION_LTR;
263}
264
265
266HB_END_DECLS
267