hb-common.cc revision 2409d5f8d7dd8b535ce5ea29e933f7db27d33793
1/*
2 * Copyright © 2009,2010  Red Hat, Inc.
3 * Copyright © 2011  Google, Inc.
4 *
5 *  This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#include "hb-private.hh"
30
31HB_BEGIN_DECLS
32
33
34/* hb_tag_t */
35
36hb_tag_t
37hb_tag_from_string (const char *s)
38{
39  char tag[4];
40  unsigned int i;
41
42  if (!s || !*s)
43    return HB_TAG_NONE;
44
45  for (i = 0; i < 4 && s[i]; i++)
46    tag[i] = s[i];
47  for (; i < 4; i++)
48    tag[i] = ' ';
49
50  return HB_TAG_CHAR4 (tag);
51}
52
53
54/* hb_language_t */
55
56struct _hb_language_t {
57  const char s[1];
58};
59
60static const char canon_map[256] = {
61   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
62   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
63   0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,  '-',  0,   0,
64  '0', '1', '2', '3', '4', '5', '6', '7',  '8', '9',  0,   0,   0,   0,   0,   0,
65  '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
66  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,  '-',
67   0,  'a', 'b', 'c', 'd', 'e', 'f', 'g',  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
68  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',  'x', 'y', 'z',  0,   0,   0,   0,   0
69};
70
71static hb_bool_t
72lang_equal (const void *v1,
73	    const void *v2)
74{
75  const unsigned char *p1 = (const unsigned char *) v1;
76  const unsigned char *p2 = (const unsigned char *) v2;
77
78  while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2])
79    {
80      p1++, p2++;
81    }
82
83  return (canon_map[*p1] == canon_map[*p2]);
84}
85
86#if 0
87static unsigned int
88lang_hash (const void *key)
89{
90  const unsigned char *p = key;
91  unsigned int h = 0;
92  while (canon_map[*p])
93    {
94      h = (h << 5) - h + canon_map[*p];
95      p++;
96    }
97
98  return h;
99}
100#endif
101
102
103hb_language_t
104hb_language_from_string (const char *str)
105{
106  static unsigned int num_langs;
107  static unsigned int num_alloced;
108  static hb_language_t *langs;
109  unsigned int i;
110  unsigned char *p;
111
112  /* TODO Use a hash table or something */
113
114  if (!str || !*str)
115    return NULL;
116
117  for (i = 0; i < num_langs; i++)
118    if (lang_equal (str, langs[i]->s))
119      return langs[i];
120
121  if (unlikely (num_langs == num_alloced)) {
122    unsigned int new_alloced = 2 * (8 + num_alloced);
123    hb_language_t *new_langs = (hb_language_t *) realloc (langs, new_alloced * sizeof (langs[0]));
124    if (!new_langs)
125      return NULL;
126    num_alloced = new_alloced;
127    langs = new_langs;
128  }
129
130  langs[i] = (hb_language_t) strdup (str);
131  for (p = (unsigned char *) langs[i]->s; *p; p++)
132    *p = canon_map[*p];
133
134  num_langs++;
135
136  return langs[i];
137}
138
139const char *
140hb_language_to_string (hb_language_t language)
141{
142  return language->s;
143}
144
145
146/* hb_script_t */
147
148hb_script_t
149hb_script_from_iso15924_tag (hb_tag_t tag)
150{
151  if (unlikely (tag == HB_TAG_NONE))
152    return HB_SCRIPT_INVALID;
153
154  /* Be lenient, adjust case (one capital letter followed by three small letters) */
155  tag = (tag & 0xDFDFDFDF) | 0x00202020;
156
157  switch (tag) {
158    case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC;
159    case HB_TAG('G','e','o','a'): return HB_SCRIPT_GEORGIAN;
160    case HB_TAG('G','e','o','n'): return HB_SCRIPT_GEORGIAN;
161    case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN;
162    case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN;
163    case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC;
164    case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC;
165    case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC;
166  }
167
168  /* If it looks right, just use the tag as a script */
169  if (((uint32_t) tag & 0xE0E0E0E0) == 0x40606060)
170    return (hb_script_t) tag;
171
172  /* Otherwise, return unknown */
173  return HB_SCRIPT_UNKNOWN;
174}
175
176hb_script_t
177hb_script_from_string (const char *s)
178{
179  return hb_script_from_iso15924_tag (hb_tag_from_string (s));
180}
181
182hb_tag_t
183hb_script_to_iso15924_tag (hb_script_t script)
184{
185  return (hb_tag_t) script;
186}
187
188hb_direction_t
189hb_script_get_horizontal_direction (hb_script_t script)
190{
191  switch ((hb_tag_t) script)
192  {
193    case HB_SCRIPT_ARABIC:
194    case HB_SCRIPT_HEBREW:
195    case HB_SCRIPT_SYRIAC:
196    case HB_SCRIPT_THAANA:
197
198    /* Unicode-4.0 additions */
199    case HB_SCRIPT_CYPRIOT:
200
201    /* Unicode-5.0 additions */
202    case HB_SCRIPT_PHOENICIAN:
203    case HB_SCRIPT_NKO:
204
205    /* Unicode-5.2 additions */
206    case HB_SCRIPT_AVESTAN:
207    case HB_SCRIPT_IMPERIAL_ARAMAIC:
208    case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI:
209    case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN:
210    case HB_SCRIPT_OLD_SOUTH_ARABIAN:
211    case HB_SCRIPT_OLD_TURKIC:
212    case HB_SCRIPT_SAMARITAN:
213
214    /* Unicode-6.0 additions */
215    case HB_SCRIPT_MANDAIC:
216
217      return HB_DIRECTION_RTL;
218  }
219
220  return HB_DIRECTION_LTR;
221}
222
223
224HB_END_DECLS
225