1/* 2 * Copyright © 2009 Red Hat, Inc. 3 * Copyright © 2011 Google, Inc. 4 * 5 * This is part of HarfBuzz, a text shaping library. 6 * 7 * Permission is hereby granted, without written agreement and without 8 * license or royalty fees, to use, copy, modify, and distribute this 9 * software and its documentation for any purpose, provided that the 10 * above copyright notice and the following two paragraphs appear in 11 * all copies of this software. 12 * 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 17 * DAMAGE. 18 * 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 24 * 25 * Red Hat Author(s): Behdad Esfahbod 26 * Google Author(s): Behdad Esfahbod 27 */ 28 29#include "hb-private.hh" 30 31#include "hb-glib.h" 32 33#include "hb-unicode-private.hh" 34 35 36#if !GLIB_CHECK_VERSION(2,29,14) 37static const hb_script_t 38glib_script_to_script[] = 39{ 40 HB_SCRIPT_COMMON, 41 HB_SCRIPT_INHERITED, 42 HB_SCRIPT_ARABIC, 43 HB_SCRIPT_ARMENIAN, 44 HB_SCRIPT_BENGALI, 45 HB_SCRIPT_BOPOMOFO, 46 HB_SCRIPT_CHEROKEE, 47 HB_SCRIPT_COPTIC, 48 HB_SCRIPT_CYRILLIC, 49 HB_SCRIPT_DESERET, 50 HB_SCRIPT_DEVANAGARI, 51 HB_SCRIPT_ETHIOPIC, 52 HB_SCRIPT_GEORGIAN, 53 HB_SCRIPT_GOTHIC, 54 HB_SCRIPT_GREEK, 55 HB_SCRIPT_GUJARATI, 56 HB_SCRIPT_GURMUKHI, 57 HB_SCRIPT_HAN, 58 HB_SCRIPT_HANGUL, 59 HB_SCRIPT_HEBREW, 60 HB_SCRIPT_HIRAGANA, 61 HB_SCRIPT_KANNADA, 62 HB_SCRIPT_KATAKANA, 63 HB_SCRIPT_KHMER, 64 HB_SCRIPT_LAO, 65 HB_SCRIPT_LATIN, 66 HB_SCRIPT_MALAYALAM, 67 HB_SCRIPT_MONGOLIAN, 68 HB_SCRIPT_MYANMAR, 69 HB_SCRIPT_OGHAM, 70 HB_SCRIPT_OLD_ITALIC, 71 HB_SCRIPT_ORIYA, 72 HB_SCRIPT_RUNIC, 73 HB_SCRIPT_SINHALA, 74 HB_SCRIPT_SYRIAC, 75 HB_SCRIPT_TAMIL, 76 HB_SCRIPT_TELUGU, 77 HB_SCRIPT_THAANA, 78 HB_SCRIPT_THAI, 79 HB_SCRIPT_TIBETAN, 80 HB_SCRIPT_CANADIAN_SYLLABICS, 81 HB_SCRIPT_YI, 82 HB_SCRIPT_TAGALOG, 83 HB_SCRIPT_HANUNOO, 84 HB_SCRIPT_BUHID, 85 HB_SCRIPT_TAGBANWA, 86 87 /* Unicode-4.0 additions */ 88 HB_SCRIPT_BRAILLE, 89 HB_SCRIPT_CYPRIOT, 90 HB_SCRIPT_LIMBU, 91 HB_SCRIPT_OSMANYA, 92 HB_SCRIPT_SHAVIAN, 93 HB_SCRIPT_LINEAR_B, 94 HB_SCRIPT_TAI_LE, 95 HB_SCRIPT_UGARITIC, 96 97 /* Unicode-4.1 additions */ 98 HB_SCRIPT_NEW_TAI_LUE, 99 HB_SCRIPT_BUGINESE, 100 HB_SCRIPT_GLAGOLITIC, 101 HB_SCRIPT_TIFINAGH, 102 HB_SCRIPT_SYLOTI_NAGRI, 103 HB_SCRIPT_OLD_PERSIAN, 104 HB_SCRIPT_KHAROSHTHI, 105 106 /* Unicode-5.0 additions */ 107 HB_SCRIPT_UNKNOWN, 108 HB_SCRIPT_BALINESE, 109 HB_SCRIPT_CUNEIFORM, 110 HB_SCRIPT_PHOENICIAN, 111 HB_SCRIPT_PHAGS_PA, 112 HB_SCRIPT_NKO, 113 114 /* Unicode-5.1 additions */ 115 HB_SCRIPT_KAYAH_LI, 116 HB_SCRIPT_LEPCHA, 117 HB_SCRIPT_REJANG, 118 HB_SCRIPT_SUNDANESE, 119 HB_SCRIPT_SAURASHTRA, 120 HB_SCRIPT_CHAM, 121 HB_SCRIPT_OL_CHIKI, 122 HB_SCRIPT_VAI, 123 HB_SCRIPT_CARIAN, 124 HB_SCRIPT_LYCIAN, 125 HB_SCRIPT_LYDIAN, 126 127 /* Unicode-5.2 additions */ 128 HB_SCRIPT_AVESTAN, 129 HB_SCRIPT_BAMUM, 130 HB_SCRIPT_EGYPTIAN_HIEROGLYPHS, 131 HB_SCRIPT_IMPERIAL_ARAMAIC, 132 HB_SCRIPT_INSCRIPTIONAL_PAHLAVI, 133 HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, 134 HB_SCRIPT_JAVANESE, 135 HB_SCRIPT_KAITHI, 136 HB_SCRIPT_TAI_THAM, 137 HB_SCRIPT_LISU, 138 HB_SCRIPT_MEETEI_MAYEK, 139 HB_SCRIPT_OLD_SOUTH_ARABIAN, 140 HB_SCRIPT_OLD_TURKIC, 141 HB_SCRIPT_SAMARITAN, 142 HB_SCRIPT_TAI_VIET, 143 144 /* Unicode-6.0 additions */ 145 HB_SCRIPT_BATAK, 146 HB_SCRIPT_BRAHMI, 147 HB_SCRIPT_MANDAIC, 148 149 /* Unicode-6.1 additions */ 150 HB_SCRIPT_CHAKMA, 151 HB_SCRIPT_MEROITIC_CURSIVE, 152 HB_SCRIPT_MEROITIC_HIEROGLYPHS, 153 HB_SCRIPT_MIAO, 154 HB_SCRIPT_SHARADA, 155 HB_SCRIPT_SORA_SOMPENG, 156 HB_SCRIPT_TAKRI 157}; 158#endif 159 160hb_script_t 161hb_glib_script_to_script (GUnicodeScript script) 162{ 163#if GLIB_CHECK_VERSION(2,29,14) 164 return (hb_script_t) g_unicode_script_to_iso15924 (script); 165#else 166 if (likely ((unsigned int) script < ARRAY_LENGTH (glib_script_to_script))) 167 return glib_script_to_script[script]; 168 169 if (unlikely (script == G_UNICODE_SCRIPT_INVALID_CODE)) 170 return HB_SCRIPT_INVALID; 171 172 return HB_SCRIPT_UNKNOWN; 173#endif 174} 175 176GUnicodeScript 177hb_glib_script_from_script (hb_script_t script) 178{ 179#if GLIB_CHECK_VERSION(2,29,14) 180 return g_unicode_script_from_iso15924 (script); 181#else 182 unsigned int count = ARRAY_LENGTH (glib_script_to_script); 183 for (unsigned int i = 0; i < count; i++) 184 if (glib_script_to_script[i] == script) 185 return (GUnicodeScript) i; 186 187 if (unlikely (script == HB_SCRIPT_INVALID)) 188 return G_UNICODE_SCRIPT_INVALID_CODE; 189 190 return G_UNICODE_SCRIPT_UNKNOWN; 191#endif 192} 193 194 195static hb_unicode_combining_class_t 196hb_glib_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, 197 hb_codepoint_t unicode, 198 void *user_data HB_UNUSED) 199 200{ 201 return (hb_unicode_combining_class_t) g_unichar_combining_class (unicode); 202} 203 204static unsigned int 205hb_glib_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs HB_UNUSED, 206 hb_codepoint_t unicode, 207 void *user_data HB_UNUSED) 208{ 209 return g_unichar_iswide (unicode) ? 2 : 1; 210} 211 212static hb_unicode_general_category_t 213hb_glib_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, 214 hb_codepoint_t unicode, 215 void *user_data HB_UNUSED) 216 217{ 218 /* hb_unicode_general_category_t and GUnicodeType are identical */ 219 return (hb_unicode_general_category_t) g_unichar_type (unicode); 220} 221 222static hb_codepoint_t 223hb_glib_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, 224 hb_codepoint_t unicode, 225 void *user_data HB_UNUSED) 226{ 227 g_unichar_get_mirror_char (unicode, &unicode); 228 return unicode; 229} 230 231static hb_script_t 232hb_glib_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, 233 hb_codepoint_t unicode, 234 void *user_data HB_UNUSED) 235{ 236 return hb_glib_script_to_script (g_unichar_get_script (unicode)); 237} 238 239static hb_bool_t 240hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, 241 hb_codepoint_t a, 242 hb_codepoint_t b, 243 hb_codepoint_t *ab, 244 void *user_data HB_UNUSED) 245{ 246#if GLIB_CHECK_VERSION(2,29,12) 247 return g_unichar_compose (a, b, ab); 248#endif 249 250 /* We don't ifdef-out the fallback code such that compiler always 251 * sees it and makes sure it's compilable. */ 252 253 gchar utf8[12]; 254 gchar *normalized; 255 int len; 256 hb_bool_t ret; 257 258 len = g_unichar_to_utf8 (a, utf8); 259 len += g_unichar_to_utf8 (b, utf8 + len); 260 normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFC); 261 len = g_utf8_strlen (normalized, -1); 262 if (unlikely (!len)) 263 return false; 264 265 if (len == 1) { 266 *ab = g_utf8_get_char (normalized); 267 ret = true; 268 } else { 269 ret = false; 270 } 271 272 g_free (normalized); 273 return ret; 274} 275 276static hb_bool_t 277hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, 278 hb_codepoint_t ab, 279 hb_codepoint_t *a, 280 hb_codepoint_t *b, 281 void *user_data HB_UNUSED) 282{ 283#if GLIB_CHECK_VERSION(2,29,12) 284 return g_unichar_decompose (ab, a, b); 285#endif 286 287 /* We don't ifdef-out the fallback code such that compiler always 288 * sees it and makes sure it's compilable. */ 289 290 gchar utf8[6]; 291 gchar *normalized; 292 int len; 293 hb_bool_t ret; 294 295 len = g_unichar_to_utf8 (ab, utf8); 296 normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFD); 297 len = g_utf8_strlen (normalized, -1); 298 if (unlikely (!len)) 299 return false; 300 301 if (len == 1) { 302 *a = g_utf8_get_char (normalized); 303 *b = 0; 304 ret = *a != ab; 305 } else if (len == 2) { 306 *a = g_utf8_get_char (normalized); 307 *b = g_utf8_get_char (g_utf8_next_char (normalized)); 308 /* Here's the ugly part: if ab decomposes to a single character and 309 * that character decomposes again, we have to detect that and undo 310 * the second part :-(. */ 311 gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC); 312 hb_codepoint_t c = g_utf8_get_char (recomposed); 313 if (c != ab && c != *a) { 314 *a = c; 315 *b = 0; 316 } 317 g_free (recomposed); 318 ret = true; 319 } else { 320 /* If decomposed to more than two characters, take the last one, 321 * and recompose the rest to get the first component. */ 322 gchar *end = g_utf8_offset_to_pointer (normalized, len - 1); 323 gchar *recomposed; 324 *b = g_utf8_get_char (end); 325 recomposed = g_utf8_normalize (normalized, end - normalized, G_NORMALIZE_NFC); 326 /* We expect that recomposed has exactly one character now. */ 327 *a = g_utf8_get_char (recomposed); 328 g_free (recomposed); 329 ret = true; 330 } 331 332 g_free (normalized); 333 return ret; 334} 335 336static unsigned int 337hb_glib_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs HB_UNUSED, 338 hb_codepoint_t u, 339 hb_codepoint_t *decomposed, 340 void *user_data HB_UNUSED) 341{ 342#if GLIB_CHECK_VERSION(2,29,12) 343 return g_unichar_fully_decompose (u, true, decomposed, HB_UNICODE_MAX_DECOMPOSITION_LEN); 344#endif 345 346 /* If the user doesn't have GLib >= 2.29.12 we have to perform 347 * a round trip to UTF-8 and the associated memory management dance. */ 348 gchar utf8[6]; 349 gchar *utf8_decomposed, *c; 350 gsize utf8_len, utf8_decomposed_len, i; 351 352 /* Convert @u to UTF-8 and normalise it in NFKD mode. This performs the compatibility decomposition. */ 353 utf8_len = g_unichar_to_utf8 (u, utf8); 354 utf8_decomposed = g_utf8_normalize (utf8, utf8_len, G_NORMALIZE_NFKD); 355 utf8_decomposed_len = g_utf8_strlen (utf8_decomposed, -1); 356 357 assert (utf8_decomposed_len <= HB_UNICODE_MAX_DECOMPOSITION_LEN); 358 359 for (i = 0, c = utf8_decomposed; i < utf8_decomposed_len; i++, c = g_utf8_next_char (c)) 360 *decomposed++ = g_utf8_get_char (c); 361 362 g_free (utf8_decomposed); 363 364 return utf8_decomposed_len; 365} 366 367hb_unicode_funcs_t * 368hb_glib_get_unicode_funcs (void) 369{ 370 static const hb_unicode_funcs_t _hb_glib_unicode_funcs = { 371 HB_OBJECT_HEADER_STATIC, 372 373 NULL, /* parent */ 374 true, /* immutable */ 375 { 376#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_glib_unicode_##name, 377 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS 378#undef HB_UNICODE_FUNC_IMPLEMENT 379 } 380 }; 381 382 return const_cast<hb_unicode_funcs_t *> (&_hb_glib_unicode_funcs); 383} 384 385hb_blob_t * 386hb_glib_blob_create (GBytes *gbytes) 387{ 388 gsize size = 0; 389 gconstpointer data = g_bytes_get_data (gbytes, &size); 390 return hb_blob_create ((const char *) data, 391 size, 392 HB_MEMORY_MODE_READONLY, 393 g_bytes_ref (gbytes), 394 (hb_destroy_func_t) g_bytes_unref); 395} 396