hb-common.cc revision 675956aca01fc4e005a338af43d1c1f4f938abd1
1/* 2 * Copyright © 2009,2010 Red Hat, Inc. 3 * Copyright © 2011,2012 Google, Inc. 4 * 5 * This is part of HarfBuzz, a text shaping library. 6 * 7 * Permission is hereby granted, without written agreement and without 8 * license or royalty fees, to use, copy, modify, and distribute this 9 * software and its documentation for any purpose, provided that the 10 * above copyright notice and the following two paragraphs appear in 11 * all copies of this software. 12 * 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 17 * DAMAGE. 18 * 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 24 * 25 * Red Hat Author(s): Behdad Esfahbod 26 * Google Author(s): Behdad Esfahbod 27 */ 28 29#include "hb-private.hh" 30 31#include "hb-mutex-private.hh" 32#include "hb-object-private.hh" 33 34#include <locale.h> 35 36 37/* hb_options_t */ 38 39hb_options_union_t _hb_options; 40 41void 42_hb_options_init (void) 43{ 44 hb_options_union_t u; 45 u.i = 0; 46 u.opts.initialized = 1; 47 48 char *c = getenv ("HB_OPTIONS"); 49 u.opts.uniscribe_bug_compatible = c && strstr (c, "uniscribe-bug-compatible"); 50 51 /* This is idempotent and threadsafe. */ 52 _hb_options = u; 53} 54 55 56/* hb_tag_t */ 57 58/** 59 * hb_tag_from_string: 60 * @str: (array length=len): 61 * @len: 62 * 63 * 64 * 65 * Return value: 66 * 67 * Since: 1.0 68 **/ 69hb_tag_t 70hb_tag_from_string (const char *str, int len) 71{ 72 char tag[4]; 73 unsigned int i; 74 75 if (!str || !len || !*str) 76 return HB_TAG_NONE; 77 78 if (len < 0 || len > 4) 79 len = 4; 80 for (i = 0; i < (unsigned) len && str[i]; i++) 81 tag[i] = str[i]; 82 for (; i < 4; i++) 83 tag[i] = ' '; 84 85 return HB_TAG_CHAR4 (tag); 86} 87 88/** 89 * hb_tag_to_string: 90 * @tag: 91 * @buf: (array fixed-size=4): 92 * 93 * 94 * 95 * Since: 1.0 96 **/ 97void 98hb_tag_to_string (hb_tag_t tag, char *buf) 99{ 100 buf[0] = (char) (uint8_t) (tag >> 24); 101 buf[1] = (char) (uint8_t) (tag >> 16); 102 buf[2] = (char) (uint8_t) (tag >> 8); 103 buf[3] = (char) (uint8_t) (tag >> 0); 104} 105 106 107/* hb_direction_t */ 108 109const char direction_strings[][4] = { 110 "ltr", 111 "rtl", 112 "ttb", 113 "btt" 114}; 115 116/** 117 * hb_direction_from_string: 118 * @str: (array length=len): 119 * @len: 120 * 121 * 122 * 123 * Return value: 124 * 125 * Since: 1.0 126 **/ 127hb_direction_t 128hb_direction_from_string (const char *str, int len) 129{ 130 if (unlikely (!str || !len || !*str)) 131 return HB_DIRECTION_INVALID; 132 133 /* Lets match loosely: just match the first letter, such that 134 * all of "ltr", "left-to-right", etc work! 135 */ 136 char c = TOLOWER (str[0]); 137 for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++) 138 if (c == direction_strings[i][0]) 139 return (hb_direction_t) (HB_DIRECTION_LTR + i); 140 141 return HB_DIRECTION_INVALID; 142} 143 144/** 145 * hb_direction_to_string: 146 * @direction: 147 * 148 * 149 * 150 * Return value: (transfer none): 151 * 152 * Since: 1.0 153 **/ 154const char * 155hb_direction_to_string (hb_direction_t direction) 156{ 157 if (likely ((unsigned int) (direction - HB_DIRECTION_LTR) 158 < ARRAY_LENGTH (direction_strings))) 159 return direction_strings[direction - HB_DIRECTION_LTR]; 160 161 return "invalid"; 162} 163 164 165/* hb_language_t */ 166 167struct hb_language_impl_t { 168 const char s[1]; 169}; 170 171static const char canon_map[256] = { 172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0, 175 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, 176 '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 177 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-', 178 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 179 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0 180}; 181 182static hb_bool_t 183lang_equal (hb_language_t v1, 184 const void *v2) 185{ 186 const unsigned char *p1 = (const unsigned char *) v1; 187 const unsigned char *p2 = (const unsigned char *) v2; 188 189 while (*p1 && *p1 == canon_map[*p2]) 190 p1++, p2++; 191 192 return *p1 == canon_map[*p2]; 193} 194 195#if 0 196static unsigned int 197lang_hash (const void *key) 198{ 199 const unsigned char *p = key; 200 unsigned int h = 0; 201 while (canon_map[*p]) 202 { 203 h = (h << 5) - h + canon_map[*p]; 204 p++; 205 } 206 207 return h; 208} 209#endif 210 211 212struct hb_language_item_t { 213 214 struct hb_language_item_t *next; 215 hb_language_t lang; 216 217 inline bool operator == (const char *s) const { 218 return lang_equal (lang, s); 219 } 220 221 inline hb_language_item_t & operator = (const char *s) { 222 lang = (hb_language_t) strdup (s); 223 for (unsigned char *p = (unsigned char *) lang; *p; p++) 224 *p = canon_map[*p]; 225 226 return *this; 227 } 228 229 void finish (void) { free ((void *) lang); } 230}; 231 232 233/* Thread-safe lock-free language list */ 234 235static hb_language_item_t *langs; 236 237#ifdef HB_USE_ATEXIT 238static 239void free_langs (void) 240{ 241 while (langs) { 242 hb_language_item_t *next = langs->next; 243 langs->finish (); 244 free (langs); 245 langs = next; 246 } 247} 248#endif 249 250static hb_language_item_t * 251lang_find_or_insert (const char *key) 252{ 253retry: 254 hb_language_item_t *first_lang = (hb_language_item_t *) hb_atomic_ptr_get (&langs); 255 256 for (hb_language_item_t *lang = first_lang; lang; lang = lang->next) 257 if (*lang == key) 258 return lang; 259 260 /* Not found; allocate one. */ 261 hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t)); 262 if (unlikely (!lang)) 263 return NULL; 264 lang->next = first_lang; 265 *lang = key; 266 267 if (!hb_atomic_ptr_cmpexch (&langs, first_lang, lang)) { 268 lang->finish (); 269 free (lang); 270 goto retry; 271 } 272 273#ifdef HB_USE_ATEXIT 274 if (!first_lang) 275 atexit (free_langs); /* First person registers atexit() callback. */ 276#endif 277 278 return lang; 279} 280 281 282/** 283 * hb_language_from_string: 284 * @str: (array length=len): 285 * @len: 286 * 287 * 288 * 289 * Return value: 290 * 291 * Since: 1.0 292 **/ 293hb_language_t 294hb_language_from_string (const char *str, int len) 295{ 296 char strbuf[64]; 297 298 if (!str || !len || !*str) 299 return HB_LANGUAGE_INVALID; 300 301 if (len >= 0) 302 { 303 /* NUL-terminate it. */ 304 len = MIN (len, (int) sizeof (strbuf) - 1); 305 memcpy (strbuf, str, len); 306 strbuf[len] = '\0'; 307 str = strbuf; 308 } 309 310 hb_language_item_t *item = lang_find_or_insert (str); 311 312 return likely (item) ? item->lang : HB_LANGUAGE_INVALID; 313} 314 315/** 316 * hb_language_to_string: 317 * @language: 318 * 319 * 320 * 321 * Return value: (transfer none): 322 * 323 * Since: 1.0 324 **/ 325const char * 326hb_language_to_string (hb_language_t language) 327{ 328 /* This is actually NULL-safe! */ 329 return language->s; 330} 331 332/** 333 * hb_language_get_default: 334 * 335 * 336 * 337 * Return value: 338 * 339 * Since: 1.0 340 **/ 341hb_language_t 342hb_language_get_default (void) 343{ 344 static hb_language_t default_language = HB_LANGUAGE_INVALID; 345 346 hb_language_t language = (hb_language_t) hb_atomic_ptr_get (&default_language); 347 if (unlikely (language == HB_LANGUAGE_INVALID)) { 348 language = hb_language_from_string (setlocale (LC_CTYPE, NULL), -1); 349 (void) hb_atomic_ptr_cmpexch (&default_language, HB_LANGUAGE_INVALID, language); 350 } 351 352 return default_language; 353} 354 355 356/* hb_script_t */ 357 358/** 359 * hb_script_from_iso15924_tag: 360 * @tag: 361 * 362 * 363 * 364 * Return value: 365 * 366 * Since: 1.0 367 **/ 368hb_script_t 369hb_script_from_iso15924_tag (hb_tag_t tag) 370{ 371 if (unlikely (tag == HB_TAG_NONE)) 372 return HB_SCRIPT_INVALID; 373 374 /* Be lenient, adjust case (one capital letter followed by three small letters) */ 375 tag = (tag & 0xDFDFDFDFu) | 0x00202020u; 376 377 switch (tag) { 378 379 /* These graduated from the 'Q' private-area codes, but 380 * the old code is still aliased by Unicode, and the Qaai 381 * one in use by ICU. */ 382 case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED; 383 case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC; 384 385 /* Script variants from http://unicode.org/iso15924/ */ 386 case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC; 387 case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN; 388 case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN; 389 case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC; 390 case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC; 391 case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC; 392 } 393 394 /* If it looks right, just use the tag as a script */ 395 if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u) 396 return (hb_script_t) tag; 397 398 /* Otherwise, return unknown */ 399 return HB_SCRIPT_UNKNOWN; 400} 401 402/** 403 * hb_script_from_string: 404 * @s: (array length=len): 405 * @len: 406 * 407 * 408 * 409 * Return value: 410 * 411 * Since: 1.0 412 **/ 413hb_script_t 414hb_script_from_string (const char *s, int len) 415{ 416 return hb_script_from_iso15924_tag (hb_tag_from_string (s, len)); 417} 418 419/** 420 * hb_script_to_iso15924_tag: 421 * @script: 422 * 423 * 424 * 425 * Return value: 426 * 427 * Since: 1.0 428 **/ 429hb_tag_t 430hb_script_to_iso15924_tag (hb_script_t script) 431{ 432 return (hb_tag_t) script; 433} 434 435/** 436 * hb_script_get_horizontal_direction: 437 * @script: 438 * 439 * 440 * 441 * Return value: 442 * 443 * Since: 1.0 444 **/ 445hb_direction_t 446hb_script_get_horizontal_direction (hb_script_t script) 447{ 448 /* http://goo.gl/x9ilM */ 449 switch ((hb_tag_t) script) 450 { 451 /* Unicode-1.1 additions */ 452 case HB_SCRIPT_ARABIC: 453 case HB_SCRIPT_HEBREW: 454 455 /* Unicode-3.0 additions */ 456 case HB_SCRIPT_SYRIAC: 457 case HB_SCRIPT_THAANA: 458 459 /* Unicode-4.0 additions */ 460 case HB_SCRIPT_CYPRIOT: 461 462 /* Unicode-4.1 additions */ 463 case HB_SCRIPT_KHAROSHTHI: 464 465 /* Unicode-5.0 additions */ 466 case HB_SCRIPT_PHOENICIAN: 467 case HB_SCRIPT_NKO: 468 469 /* Unicode-5.1 additions */ 470 case HB_SCRIPT_LYDIAN: 471 472 /* Unicode-5.2 additions */ 473 case HB_SCRIPT_AVESTAN: 474 case HB_SCRIPT_IMPERIAL_ARAMAIC: 475 case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI: 476 case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN: 477 case HB_SCRIPT_OLD_SOUTH_ARABIAN: 478 case HB_SCRIPT_OLD_TURKIC: 479 case HB_SCRIPT_SAMARITAN: 480 481 /* Unicode-6.0 additions */ 482 case HB_SCRIPT_MANDAIC: 483 484 /* Unicode-6.1 additions */ 485 case HB_SCRIPT_MEROITIC_CURSIVE: 486 case HB_SCRIPT_MEROITIC_HIEROGLYPHS: 487 488 /* Unicode-7.0 additions */ 489 case HB_SCRIPT_MANICHAEAN: 490 case HB_SCRIPT_MENDE_KIKAKUI: 491 case HB_SCRIPT_NABATAEAN: 492 case HB_SCRIPT_OLD_NORTH_ARABIAN: 493 case HB_SCRIPT_PALMYRENE: 494 case HB_SCRIPT_PSALTER_PAHLAVI: 495 496 return HB_DIRECTION_RTL; 497 } 498 499 return HB_DIRECTION_LTR; 500} 501 502 503/* hb_user_data_array_t */ 504 505bool 506hb_user_data_array_t::set (hb_user_data_key_t *key, 507 void * data, 508 hb_destroy_func_t destroy, 509 hb_bool_t replace) 510{ 511 if (!key) 512 return false; 513 514 if (replace) { 515 if (!data && !destroy) { 516 items.remove (key, lock); 517 return true; 518 } 519 } 520 hb_user_data_item_t item = {key, data, destroy}; 521 bool ret = !!items.replace_or_insert (item, lock, replace); 522 523 return ret; 524} 525 526void * 527hb_user_data_array_t::get (hb_user_data_key_t *key) 528{ 529 hb_user_data_item_t item = {NULL }; 530 531 return items.find (key, &item, lock) ? item.data : NULL; 532} 533 534 535/* hb_version */ 536 537/** 538 * hb_version: 539 * @major: (out): Library major version component. 540 * @minor: (out): Library minor version component. 541 * @micro: (out): Library micro version component. 542 * 543 * Returns library version as three integer components. 544 * 545 * Since: 1.0 546 **/ 547void 548hb_version (unsigned int *major, 549 unsigned int *minor, 550 unsigned int *micro) 551{ 552 *major = HB_VERSION_MAJOR; 553 *minor = HB_VERSION_MINOR; 554 *micro = HB_VERSION_MICRO; 555} 556 557/** 558 * hb_version_string: 559 * 560 * Returns library version as a string with three components. 561 * 562 * Return value: library version string. 563 * 564 * Since: 1.0 565 **/ 566const char * 567hb_version_string (void) 568{ 569 return HB_VERSION_STRING; 570} 571 572/** 573 * hb_version_atleast: 574 * @major: 575 * @minor: 576 * @micro: 577 * 578 * 579 * 580 * Return value: 581 * 582 * Since: 1.0 583 **/ 584hb_bool_t 585hb_version_atleast (unsigned int major, 586 unsigned int minor, 587 unsigned int micro) 588{ 589 return HB_VERSION_ATLEAST (major, minor, micro); 590} 591