hb-common.cc revision d02985ec5a24c659a0a133cc6bc103f1d76bcb29
1/* 2 * Copyright © 2009,2010 Red Hat, Inc. 3 * Copyright © 2011 Google, Inc. 4 * 5 * This is part of HarfBuzz, a text shaping library. 6 * 7 * Permission is hereby granted, without written agreement and without 8 * license or royalty fees, to use, copy, modify, and distribute this 9 * software and its documentation for any purpose, provided that the 10 * above copyright notice and the following two paragraphs appear in 11 * all copies of this software. 12 * 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 17 * DAMAGE. 18 * 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 24 * 25 * Red Hat Author(s): Behdad Esfahbod 26 * Google Author(s): Behdad Esfahbod 27 */ 28 29#include "hb-private.hh" 30 31HB_BEGIN_DECLS 32 33 34/* hb_tag_t */ 35 36hb_tag_t 37hb_tag_from_string (const char *s) 38{ 39 char tag[4]; 40 unsigned int i; 41 42 if (!s || !*s) 43 return HB_TAG_NONE; 44 45 for (i = 0; i < 4 && s[i]; i++) 46 tag[i] = s[i]; 47 for (; i < 4; i++) 48 tag[i] = ' '; 49 50 return HB_TAG_CHAR4 (tag); 51} 52 53 54/* hb_direction_t */ 55 56const char direction_strings[][4] = { 57 "ltr", 58 "rtl", 59 "ttb", 60 "btt" 61}; 62 63hb_direction_t 64hb_direction_from_string (const char *str) 65{ 66 if (unlikely (!str || !*str)) 67 return HB_DIRECTION_INVALID; 68 69 /* Lets match loosely: just match the first letter, such that 70 * all of "ltr", "left-to-right", etc work! 71 */ 72 char c = TOLOWER (str[0]); 73 for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++) 74 if (c == direction_strings[i][0]) 75 return (hb_direction_t) i; 76 77 return HB_DIRECTION_INVALID; 78} 79 80const char * 81hb_direction_to_string (hb_direction_t direction) 82{ 83 if (likely ((unsigned int) direction < ARRAY_LENGTH (direction_strings))) 84 return direction_strings[direction]; 85 86 return "invalid"; 87} 88 89 90/* hb_language_t */ 91 92struct _hb_language_t { 93 const char s[1]; 94}; 95 96static const char canon_map[256] = { 97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0, 100 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, 101 '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 102 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-', 103 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 104 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0 105}; 106 107static hb_bool_t 108lang_equal (const void *v1, 109 const void *v2) 110{ 111 const unsigned char *p1 = (const unsigned char *) v1; 112 const unsigned char *p2 = (const unsigned char *) v2; 113 114 while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2]) 115 { 116 p1++, p2++; 117 } 118 119 return (canon_map[*p1] == canon_map[*p2]); 120} 121 122#if 0 123static unsigned int 124lang_hash (const void *key) 125{ 126 const unsigned char *p = key; 127 unsigned int h = 0; 128 while (canon_map[*p]) 129 { 130 h = (h << 5) - h + canon_map[*p]; 131 p++; 132 } 133 134 return h; 135} 136#endif 137 138 139hb_language_t 140hb_language_from_string (const char *str) 141{ 142 static unsigned int num_langs; 143 static unsigned int num_alloced; 144 static hb_language_t *langs; 145 unsigned int i; 146 unsigned char *p; 147 148 /* TODO Use a hash table or something */ 149 150 if (!str || !*str) 151 return NULL; 152 153 for (i = 0; i < num_langs; i++) 154 if (lang_equal (str, langs[i]->s)) 155 return langs[i]; 156 157 if (unlikely (num_langs == num_alloced)) { 158 unsigned int new_alloced = 2 * (8 + num_alloced); 159 hb_language_t *new_langs = (hb_language_t *) realloc (langs, new_alloced * sizeof (langs[0])); 160 if (!new_langs) 161 return NULL; 162 num_alloced = new_alloced; 163 langs = new_langs; 164 } 165 166 langs[i] = (hb_language_t) strdup (str); 167 for (p = (unsigned char *) langs[i]->s; *p; p++) 168 *p = canon_map[*p]; 169 170 num_langs++; 171 172 return langs[i]; 173} 174 175const char * 176hb_language_to_string (hb_language_t language) 177{ 178 return language->s; 179} 180 181 182/* hb_script_t */ 183 184hb_script_t 185hb_script_from_iso15924_tag (hb_tag_t tag) 186{ 187 if (unlikely (tag == HB_TAG_NONE)) 188 return HB_SCRIPT_INVALID; 189 190 /* Be lenient, adjust case (one capital letter followed by three small letters) */ 191 tag = (tag & 0xDFDFDFDF) | 0x00202020; 192 193 switch (tag) { 194 195 /* These graduated from the 'Q' private-area codes, but 196 * the old code is still aliased by Unicode, and the Qaai 197 * one in use by ICU. */ 198 case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED; 199 case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC; 200 201 /* Script variants from http://unicode.org/iso15924/ */ 202 case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC; 203 case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN; 204 case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN; 205 case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC; 206 case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC; 207 case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC; 208 } 209 210 /* If it looks right, just use the tag as a script */ 211 if (((uint32_t) tag & 0xE0E0E0E0) == 0x40606060) 212 return (hb_script_t) tag; 213 214 /* Otherwise, return unknown */ 215 return HB_SCRIPT_UNKNOWN; 216} 217 218hb_script_t 219hb_script_from_string (const char *s) 220{ 221 return hb_script_from_iso15924_tag (hb_tag_from_string (s)); 222} 223 224hb_tag_t 225hb_script_to_iso15924_tag (hb_script_t script) 226{ 227 return (hb_tag_t) script; 228} 229 230hb_direction_t 231hb_script_get_horizontal_direction (hb_script_t script) 232{ 233 switch ((hb_tag_t) script) 234 { 235 case HB_SCRIPT_ARABIC: 236 case HB_SCRIPT_HEBREW: 237 case HB_SCRIPT_SYRIAC: 238 case HB_SCRIPT_THAANA: 239 240 /* Unicode-4.0 additions */ 241 case HB_SCRIPT_CYPRIOT: 242 243 /* Unicode-5.0 additions */ 244 case HB_SCRIPT_PHOENICIAN: 245 case HB_SCRIPT_NKO: 246 247 /* Unicode-5.2 additions */ 248 case HB_SCRIPT_AVESTAN: 249 case HB_SCRIPT_IMPERIAL_ARAMAIC: 250 case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI: 251 case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN: 252 case HB_SCRIPT_OLD_SOUTH_ARABIAN: 253 case HB_SCRIPT_OLD_TURKIC: 254 case HB_SCRIPT_SAMARITAN: 255 256 /* Unicode-6.0 additions */ 257 case HB_SCRIPT_MANDAIC: 258 259 return HB_DIRECTION_RTL; 260 } 261 262 return HB_DIRECTION_LTR; 263} 264 265 266HB_END_DECLS 267