hb-ot-tag.cc revision 62879eebd9965179af8602ba29ac0a64a739b757
1/* 2 * Copyright (C) 2009 Red Hat, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Red Hat Author(s): Behdad Esfahbod 25 */ 26 27#include "hb-private.h" 28#include "hb-ot.h" 29 30#include <string.h> 31 32HB_BEGIN_DECLS 33 34 35/* hb_script_t */ 36 37static hb_tag_t 38hb_ot_old_tag_from_script (hb_script_t script) 39{ 40 switch ((hb_tag_t) script) { 41 case HB_SCRIPT_COPTIC: return HB_TAG('c','o','p','t'); 42 case HB_SCRIPT_HIRAGANA: return HB_TAG('k','a','n','a'); 43 case HB_SCRIPT_LAO: return HB_TAG('l','a','o',' '); 44 case HB_SCRIPT_YI: return HB_TAG('y','i',' ',' '); 45 /* Unicode-5.0 additions */ 46 case HB_SCRIPT_NKO: return HB_TAG('n','k','o',' '); 47 /* Unicode-5.1 additions */ 48 case HB_SCRIPT_VAI: return HB_TAG('v','a','i',' '); 49 /* Unicode-5.2 additions */ 50 case HB_SCRIPT_MEETEI_MAYEK: return HB_TAG('m','y','e','i'); 51 /* Unicode-6.0 additions */ 52 } 53 54 /* Else, just change first char to lowercase and return */ 55 return ((hb_tag_t) script) | 0x02000000; 56} 57 58static hb_script_t 59hb_ot_old_tag_to_script (hb_tag_t tag) 60{ 61 switch (tag) { 62 case HB_TAG('c','o','p','t'): return HB_SCRIPT_COPTIC; 63 case HB_TAG('k','a','n','a'): return HB_SCRIPT_HIRAGANA; 64 case HB_TAG('l','a','o',' '): return HB_SCRIPT_LAO; 65 case HB_TAG('y','i',' ',' '): return HB_SCRIPT_YI; 66 /* Unicode-5.0 additions */ 67 case HB_TAG('n','k','o',' '): return HB_SCRIPT_NKO; 68 /* Unicode-5.1 additions */ 69 case HB_TAG('v','a','i',' '): return HB_SCRIPT_VAI; 70 /* Unicode-5.2 additions */ 71 case HB_TAG('m','y','e','i'): return HB_SCRIPT_MEETEI_MAYEK; 72 /* Unicode-6.0 additions */ 73 } 74 75 /* Else, just change first char to uppercase and return */ 76 return (hb_script_t) (tag & ~0x02000000); 77} 78 79static hb_tag_t 80hb_ot_new_tag_from_script (hb_script_t script) 81{ 82 switch ((hb_tag_t) script) { 83 case HB_SCRIPT_BENGALI: return HB_TAG('b','n','g','2'); 84 case HB_SCRIPT_DEVANAGARI: return HB_TAG('d','e','v','2'); 85 case HB_SCRIPT_GUJARATI: return HB_TAG('g','j','r','2'); 86 case HB_SCRIPT_GURMUKHI: return HB_TAG('g','u','r','2'); 87 case HB_SCRIPT_KANNADA: return HB_TAG('k','n','d','2'); 88 case HB_SCRIPT_MALAYALAM: return HB_TAG('m','l','m','2'); 89 case HB_SCRIPT_ORIYA: return HB_TAG('o','r','y','2'); 90 case HB_SCRIPT_TAMIL: return HB_TAG('t','m','l','2'); 91 case HB_SCRIPT_TELUGU: return HB_TAG('t','e','l','2'); 92 } 93 94 return HB_TAG_NONE; 95} 96 97static hb_script_t 98hb_ot_new_tag_to_script (hb_tag_t tag) 99{ 100 switch (tag) { 101 case HB_TAG('b','n','g','2'): return HB_SCRIPT_BENGALI; 102 case HB_TAG('d','e','v','2'): return HB_SCRIPT_DEVANAGARI; 103 case HB_TAG('g','j','r','2'): return HB_SCRIPT_GUJARATI; 104 case HB_TAG('g','u','r','2'): return HB_SCRIPT_GURMUKHI; 105 case HB_TAG('k','n','d','2'): return HB_SCRIPT_KANNADA; 106 case HB_TAG('m','l','m','2'): return HB_SCRIPT_MALAYALAM; 107 case HB_TAG('o','r','y','2'): return HB_SCRIPT_ORIYA; 108 case HB_TAG('t','m','l','2'): return HB_SCRIPT_TAMIL; 109 case HB_TAG('t','e','l','2'): return HB_SCRIPT_TELUGU; 110 } 111 112 return HB_SCRIPT_UNKNOWN; 113} 114 115/* 116 * Complete list at: 117 * http://www.microsoft.com/typography/otspec/scripttags.htm 118 * 119 * Most of the script tags are the same as the ISO 15924 tag but lowercased. 120 * So we just do that, and handle the exceptional cases in a switch. 121 */ 122 123void 124hb_ot_tags_from_script (hb_script_t script, 125 hb_tag_t *script_tag_1, 126 hb_tag_t *script_tag_2) 127{ 128 hb_tag_t new_tag; 129 130 *script_tag_2 = HB_TAG_NONE; 131 *script_tag_1 = hb_ot_old_tag_from_script (script); 132 133 new_tag = hb_ot_new_tag_from_script (script); 134 if (unlikely (new_tag != HB_TAG_NONE)) { 135 *script_tag_2 = *script_tag_1; 136 *script_tag_1 = new_tag; 137 } 138} 139 140hb_script_t 141hb_ot_tag_to_script (hb_tag_t tag) 142{ 143 if (unlikely ((tag & 0x000000FF) == '2')) 144 return hb_ot_new_tag_to_script (tag); 145 146 return hb_ot_old_tag_to_script (tag); 147} 148 149 150/* hb_language_t */ 151 152typedef struct { 153 char language[6]; 154 hb_tag_t tag; 155} LangTag; 156 157/* 158 * Complete list at: 159 * http://www.microsoft.com/typography/otspec/languagetags.htm 160 * 161 * Generated by intersecting the OpenType language tag list from 162 * Draft OpenType 1.5 spec, with with the ISO 639-3 codes from 163 * 2008/08/04, matching on name, and finally adjusted manually. 164 * 165 * Many items still missing. Those are commented out at the end. 166 * Keep sorted for bsearch. 167 */ 168static const LangTag ot_languages[] = { 169 {"aa", HB_TAG('A','F','R',' ')}, /* Afar */ 170 {"ab", HB_TAG('A','B','K',' ')}, /* Abkhazian */ 171 {"abq", HB_TAG('A','B','A',' ')}, /* Abaza */ 172 {"ady", HB_TAG('A','D','Y',' ')}, /* Adyghe */ 173 {"af", HB_TAG('A','F','K',' ')}, /* Afrikaans */ 174 {"aiw", HB_TAG('A','R','I',' ')}, /* Aari */ 175 {"am", HB_TAG('A','M','H',' ')}, /* Amharic */ 176 {"ar", HB_TAG('A','R','A',' ')}, /* Arabic */ 177 {"arn", HB_TAG('M','A','P',' ')}, /* Mapudungun */ 178 {"as", HB_TAG('A','S','M',' ')}, /* Assamese */ 179 {"av", HB_TAG('A','V','R',' ')}, /* Avaric */ 180 {"awa", HB_TAG('A','W','A',' ')}, /* Awadhi */ 181 {"ay", HB_TAG('A','Y','M',' ')}, /* Aymara */ 182 {"az", HB_TAG('A','Z','E',' ')}, /* Azerbaijani */ 183 {"ba", HB_TAG('B','S','H',' ')}, /* Bashkir */ 184 {"bal", HB_TAG('B','L','I',' ')}, /* Baluchi */ 185 {"bcq", HB_TAG('B','C','H',' ')}, /* Bench */ 186 {"bem", HB_TAG('B','E','M',' ')}, /* Bemba (Zambia) */ 187 {"bfq", HB_TAG('B','A','D',' ')}, /* Badaga */ 188 {"bft", HB_TAG('B','L','T',' ')}, /* Balti */ 189 {"bg", HB_TAG('B','G','R',' ')}, /* Bulgarian */ 190 {"bhb", HB_TAG('B','H','I',' ')}, /* Bhili */ 191 {"bho", HB_TAG('B','H','O',' ')}, /* Bhojpuri */ 192 {"bik", HB_TAG('B','I','K',' ')}, /* Bikol */ 193 {"bin", HB_TAG('E','D','O',' ')}, /* Bini */ 194 {"bm", HB_TAG('B','M','B',' ')}, /* Bambara */ 195 {"bn", HB_TAG('B','E','N',' ')}, /* Bengali */ 196 {"bo", HB_TAG('T','I','B',' ')}, /* Tibetan */ 197 {"br", HB_TAG('B','R','E',' ')}, /* Breton */ 198 {"brh", HB_TAG('B','R','H',' ')}, /* Brahui */ 199 {"bs", HB_TAG('B','O','S',' ')}, /* Bosnian */ 200 {"btb", HB_TAG('B','T','I',' ')}, /* Beti (Cameroon) */ 201 {"ca", HB_TAG('C','A','T',' ')}, /* Catalan */ 202 {"ce", HB_TAG('C','H','E',' ')}, /* Chechen */ 203 {"ceb", HB_TAG('C','E','B',' ')}, /* Cebuano */ 204 {"chp", HB_TAG('C','H','P',' ')}, /* Chipewyan */ 205 {"chr", HB_TAG('C','H','R',' ')}, /* Cherokee */ 206 {"cop", HB_TAG('C','O','P',' ')}, /* Coptic */ 207 {"cr", HB_TAG('C','R','E',' ')}, /* Cree */ 208 {"crh", HB_TAG('C','R','T',' ')}, /* Crimean Tatar */ 209 {"crm", HB_TAG('M','C','R',' ')}, /* Moose Cree */ 210 {"crx", HB_TAG('C','R','R',' ')}, /* Carrier */ 211 {"cs", HB_TAG('C','S','Y',' ')}, /* Czech */ 212 {"cu", HB_TAG('C','S','L',' ')}, /* Church Slavic */ 213 {"cv", HB_TAG('C','H','U',' ')}, /* Chuvash */ 214 {"cwd", HB_TAG('D','C','R',' ')}, /* Woods Cree */ 215 {"cy", HB_TAG('W','E','L',' ')}, /* Welsh */ 216 {"da", HB_TAG('D','A','N',' ')}, /* Danish */ 217 {"dap", HB_TAG('N','I','S',' ')}, /* Nisi (India) */ 218 {"dar", HB_TAG('D','A','R',' ')}, /* Dargwa */ 219 {"de", HB_TAG('D','E','U',' ')}, /* German */ 220 {"din", HB_TAG('D','N','K',' ')}, /* Dinka */ 221 {"dng", HB_TAG('D','U','N',' ')}, /* Dungan */ 222 {"doi", HB_TAG('D','G','R',' ')}, /* Dogri */ 223 {"dsb", HB_TAG('L','S','B',' ')}, /* Lower Sorbian */ 224 {"dv", HB_TAG('D','I','V',' ')}, /* Dhivehi */ 225 {"dz", HB_TAG('D','Z','N',' ')}, /* Dzongkha */ 226 {"ee", HB_TAG('E','W','E',' ')}, /* Ewe */ 227 {"efi", HB_TAG('E','F','I',' ')}, /* Efik */ 228 {"el", HB_TAG('E','L','L',' ')}, /* Modern Greek (1453-) */ 229 {"en", HB_TAG('E','N','G',' ')}, /* English */ 230 {"eo", HB_TAG('N','T','O',' ')}, /* Esperanto */ 231 {"eot", HB_TAG('B','T','I',' ')}, /* Beti (Côte d'Ivoire) */ 232 {"es", HB_TAG('E','S','P',' ')}, /* Spanish */ 233 {"et", HB_TAG('E','T','I',' ')}, /* Estonian */ 234 {"eu", HB_TAG('E','U','Q',' ')}, /* Basque */ 235 {"eve", HB_TAG('E','V','N',' ')}, /* Even */ 236 {"evn", HB_TAG('E','V','K',' ')}, /* Evenki */ 237 {"fa", HB_TAG('F','A','R',' ')}, /* Persian */ 238 {"ff", HB_TAG('F','U','L',' ')}, /* Fulah */ 239 {"fi", HB_TAG('F','I','N',' ')}, /* Finnish */ 240 {"fil", HB_TAG('P','I','L',' ')}, /* Filipino */ 241 {"fj", HB_TAG('F','J','I',' ')}, /* Fijian */ 242 {"fo", HB_TAG('F','O','S',' ')}, /* Faroese */ 243 {"fon", HB_TAG('F','O','N',' ')}, /* Fon */ 244 {"fr", HB_TAG('F','R','A',' ')}, /* French */ 245 {"fur", HB_TAG('F','R','L',' ')}, /* Friulian */ 246 {"fy", HB_TAG('F','R','I',' ')}, /* Western Frisian */ 247 {"ga", HB_TAG('I','R','I',' ')}, /* Irish */ 248 {"gaa", HB_TAG('G','A','D',' ')}, /* Ga */ 249 {"gag", HB_TAG('G','A','G',' ')}, /* Gagauz */ 250 {"gbm", HB_TAG('G','A','W',' ')}, /* Garhwali */ 251 {"gd", HB_TAG('G','A','E',' ')}, /* Scottish Gaelic */ 252 {"gl", HB_TAG('G','A','L',' ')}, /* Galician */ 253 {"gld", HB_TAG('N','A','N',' ')}, /* Nanai */ 254 {"gn", HB_TAG('G','U','A',' ')}, /* Guarani */ 255 {"gon", HB_TAG('G','O','N',' ')}, /* Gondi */ 256 {"grt", HB_TAG('G','R','O',' ')}, /* Garo */ 257 {"gu", HB_TAG('G','U','J',' ')}, /* Gujarati */ 258 {"guk", HB_TAG('G','M','Z',' ')}, /* Gumuz */ 259 {"gv", HB_TAG('M','N','X',' ')}, /* Manx Gaelic */ 260 {"ha", HB_TAG('H','A','U',' ')}, /* Hausa */ 261 {"har", HB_TAG('H','R','I',' ')}, /* Harari */ 262 {"he", HB_TAG('I','W','R',' ')}, /* Hebrew */ 263 {"hi", HB_TAG('H','I','N',' ')}, /* Hindi */ 264 {"hil", HB_TAG('H','I','L',' ')}, /* Hiligaynon */ 265 {"hoc", HB_TAG('H','O',' ',' ')}, /* Ho */ 266 {"hr", HB_TAG('H','R','V',' ')}, /* Croatian */ 267 {"hsb", HB_TAG('U','S','B',' ')}, /* Upper Sorbian */ 268 {"ht", HB_TAG('H','A','I',' ')}, /* Haitian */ 269 {"hu", HB_TAG('H','U','N',' ')}, /* Hungarian */ 270 {"hy", HB_TAG('H','Y','E',' ')}, /* Armenian */ 271 {"id", HB_TAG('I','N','D',' ')}, /* Indonesian */ 272 {"ig", HB_TAG('I','B','O',' ')}, /* Igbo */ 273 {"igb", HB_TAG('E','B','I',' ')}, /* Ebira */ 274 {"inh", HB_TAG('I','N','G',' ')}, /* Ingush */ 275 {"is", HB_TAG('I','S','L',' ')}, /* Icelandic */ 276 {"it", HB_TAG('I','T','A',' ')}, /* Italian */ 277 {"iu", HB_TAG('I','N','U',' ')}, /* Inuktitut */ 278 {"ja", HB_TAG('J','A','N',' ')}, /* Japanese */ 279 {"jv", HB_TAG('J','A','V',' ')}, /* Javanese */ 280 {"ka", HB_TAG('K','A','T',' ')}, /* Georgian */ 281 {"kam", HB_TAG('K','M','B',' ')}, /* Kamba (Kenya) */ 282 {"kbd", HB_TAG('K','A','B',' ')}, /* Kabardian */ 283 {"kdr", HB_TAG('K','R','M',' ')}, /* Karaim */ 284 {"kdt", HB_TAG('K','U','Y',' ')}, /* Kuy */ 285 {"kfr", HB_TAG('K','A','C',' ')}, /* Kachchi */ 286 {"kfy", HB_TAG('K','M','N',' ')}, /* Kumaoni */ 287 {"kha", HB_TAG('K','S','I',' ')}, /* Khasi */ 288 {"khw", HB_TAG('K','H','W',' ')}, /* Khowar */ 289 {"ki", HB_TAG('K','I','K',' ')}, /* Kikuyu */ 290 {"kk", HB_TAG('K','A','Z',' ')}, /* Kazakh */ 291 {"kl", HB_TAG('G','R','N',' ')}, /* Kalaallisut */ 292 {"kln", HB_TAG('K','A','L',' ')}, /* Kalenjin */ 293 {"km", HB_TAG('K','H','M',' ')}, /* Central Khmer */ 294 {"kmw", HB_TAG('K','M','O',' ')}, /* Komo (Democratic Republic of Congo) */ 295 {"kn", HB_TAG('K','A','N',' ')}, /* Kannada */ 296 {"ko", HB_TAG('K','O','R',' ')}, /* Korean */ 297 {"koi", HB_TAG('K','O','P',' ')}, /* Komi-Permyak */ 298 {"kok", HB_TAG('K','O','K',' ')}, /* Konkani */ 299 {"kpe", HB_TAG('K','P','L',' ')}, /* Kpelle */ 300 {"kpv", HB_TAG('K','O','Z',' ')}, /* Komi-Zyrian */ 301 {"kpy", HB_TAG('K','Y','K',' ')}, /* Koryak */ 302 {"kqy", HB_TAG('K','R','T',' ')}, /* Koorete */ 303 {"kr", HB_TAG('K','N','R',' ')}, /* Kanuri */ 304 {"kri", HB_TAG('K','R','I',' ')}, /* Krio */ 305 {"krl", HB_TAG('K','R','L',' ')}, /* Karelian */ 306 {"kru", HB_TAG('K','U','U',' ')}, /* Kurukh */ 307 {"ks", HB_TAG('K','S','H',' ')}, /* Kashmiri */ 308 {"ku", HB_TAG('K','U','R',' ')}, /* Kurdish */ 309 {"kum", HB_TAG('K','U','M',' ')}, /* Kumyk */ 310 {"kvd", HB_TAG('K','U','I',' ')}, /* Kui (Indonesia) */ 311 {"kxu", HB_TAG('K','U','I',' ')}, /* Kui (India) */ 312 {"ky", HB_TAG('K','I','R',' ')}, /* Kirghiz */ 313 {"la", HB_TAG('L','A','T',' ')}, /* Latin */ 314 {"lad", HB_TAG('J','U','D',' ')}, /* Ladino */ 315 {"lb", HB_TAG('L','T','Z',' ')}, /* Luxembourgish */ 316 {"lbe", HB_TAG('L','A','K',' ')}, /* Lak */ 317 {"lbj", HB_TAG('L','D','K',' ')}, /* Ladakhi */ 318 {"lif", HB_TAG('L','M','B',' ')}, /* Limbu */ 319 {"lld", HB_TAG('L','A','D',' ')}, /* Ladin */ 320 {"ln", HB_TAG('L','I','N',' ')}, /* Lingala */ 321 {"lo", HB_TAG('L','A','O',' ')}, /* Lao */ 322 {"lt", HB_TAG('L','T','H',' ')}, /* Lithuanian */ 323 {"luo", HB_TAG('L','U','O',' ')}, /* Luo (Kenya and Tanzania) */ 324 {"luw", HB_TAG('L','U','O',' ')}, /* Luo (Cameroon) */ 325 {"lv", HB_TAG('L','V','I',' ')}, /* Latvian */ 326 {"lzz", HB_TAG('L','A','Z',' ')}, /* Laz */ 327 {"mai", HB_TAG('M','T','H',' ')}, /* Maithili */ 328 {"mdc", HB_TAG('M','L','E',' ')}, /* Male (Papua New Guinea) */ 329 {"mdf", HB_TAG('M','O','K',' ')}, /* Moksha */ 330 {"mdy", HB_TAG('M','L','E',' ')}, /* Male (Ethiopia) */ 331 {"men", HB_TAG('M','D','E',' ')}, /* Mende (Sierra Leone) */ 332 {"mg", HB_TAG('M','L','G',' ')}, /* Malagasy */ 333 {"mi", HB_TAG('M','R','I',' ')}, /* Maori */ 334 {"mk", HB_TAG('M','K','D',' ')}, /* Macedonian */ 335 {"ml", HB_TAG('M','L','R',' ')}, /* Malayalam */ 336 {"mn", HB_TAG('M','N','G',' ')}, /* Mongolian */ 337 {"mnc", HB_TAG('M','C','H',' ')}, /* Manchu */ 338 {"mni", HB_TAG('M','N','I',' ')}, /* Manipuri */ 339 {"mnk", HB_TAG('M','N','D',' ')}, /* Mandinka */ 340 {"mns", HB_TAG('M','A','N',' ')}, /* Mansi */ 341 {"mnw", HB_TAG('M','O','N',' ')}, /* Mon */ 342 {"mo", HB_TAG('M','O','L',' ')}, /* Moldavian */ 343 {"moh", HB_TAG('M','O','H',' ')}, /* Mohawk */ 344 {"mpe", HB_TAG('M','A','J',' ')}, /* Majang */ 345 {"mr", HB_TAG('M','A','R',' ')}, /* Marathi */ 346 {"ms", HB_TAG('M','L','Y',' ')}, /* Malay */ 347 {"mt", HB_TAG('M','T','S',' ')}, /* Maltese */ 348 {"mwr", HB_TAG('M','A','W',' ')}, /* Marwari */ 349 {"my", HB_TAG('B','R','M',' ')}, /* Burmese */ 350 {"mym", HB_TAG('M','E','N',' ')}, /* Me'en */ 351 {"myv", HB_TAG('E','R','Z',' ')}, /* Erzya */ 352 {"nb", HB_TAG('N','O','R',' ')}, /* Norwegian Bokmål */ 353 {"nco", HB_TAG('S','I','B',' ')}, /* Sibe */ 354 {"ne", HB_TAG('N','E','P',' ')}, /* Nepali */ 355 {"new", HB_TAG('N','E','W',' ')}, /* Newari */ 356 {"ng", HB_TAG('N','D','G',' ')}, /* Ndonga */ 357 {"ngl", HB_TAG('L','M','W',' ')}, /* Lomwe */ 358 {"niu", HB_TAG('N','I','U',' ')}, /* Niuean */ 359 {"niv", HB_TAG('G','I','L',' ')}, /* Gilyak */ 360 {"nl", HB_TAG('N','L','D',' ')}, /* Dutch */ 361 {"nn", HB_TAG('N','Y','N',' ')}, /* Norwegian Nynorsk */ 362 {"no", HB_TAG('N','O','R',' ')}, /* Norwegian (deprecated) */ 363 {"nog", HB_TAG('N','O','G',' ')}, /* Nogai */ 364 {"nqo", HB_TAG('N','K','O',' ')}, /* N'Ko */ 365 {"nsk", HB_TAG('N','A','S',' ')}, /* Naskapi */ 366 {"ny", HB_TAG('C','H','I',' ')}, /* Nyanja */ 367 {"oc", HB_TAG('O','C','I',' ')}, /* Occitan (post 1500) */ 368 {"oj", HB_TAG('O','J','B',' ')}, /* Ojibwa */ 369 {"om", HB_TAG('O','R','O',' ')}, /* Oromo */ 370 {"or", HB_TAG('O','R','I',' ')}, /* Oriya */ 371 {"os", HB_TAG('O','S','S',' ')}, /* Ossetian */ 372 {"pa", HB_TAG('P','A','N',' ')}, /* Panjabi */ 373 {"pi", HB_TAG('P','A','L',' ')}, /* Pali */ 374 {"pl", HB_TAG('P','L','K',' ')}, /* Polish */ 375 {"plp", HB_TAG('P','A','P',' ')}, /* Palpa */ 376 {"prs", HB_TAG('D','R','I',' ')}, /* Dari */ 377 {"ps", HB_TAG('P','A','S',' ')}, /* Pushto */ 378 {"pt", HB_TAG('P','T','G',' ')}, /* Portuguese */ 379 {"raj", HB_TAG('R','A','J',' ')}, /* Rajasthani */ 380 {"ria", HB_TAG('R','I','A',' ')}, /* Riang (India) */ 381 {"ril", HB_TAG('R','I','A',' ')}, /* Riang (Myanmar) */ 382 {"ro", HB_TAG('R','O','M',' ')}, /* Romanian */ 383 {"rom", HB_TAG('R','O','Y',' ')}, /* Romany */ 384 {"ru", HB_TAG('R','U','S',' ')}, /* Russian */ 385 {"rue", HB_TAG('R','S','Y',' ')}, /* Rusyn */ 386 {"sa", HB_TAG('S','A','N',' ')}, /* Sanskrit */ 387 {"sah", HB_TAG('Y','A','K',' ')}, /* Yakut */ 388 {"sat", HB_TAG('S','A','T',' ')}, /* Santali */ 389 {"sck", HB_TAG('S','A','D',' ')}, /* Sadri */ 390 {"sd", HB_TAG('S','N','D',' ')}, /* Sindhi */ 391 {"se", HB_TAG('N','S','M',' ')}, /* Northern Sami */ 392 {"seh", HB_TAG('S','N','A',' ')}, /* Sena */ 393 {"sel", HB_TAG('S','E','L',' ')}, /* Selkup */ 394 {"sg", HB_TAG('S','G','O',' ')}, /* Sango */ 395 {"shn", HB_TAG('S','H','N',' ')}, /* Shan */ 396 {"si", HB_TAG('S','N','H',' ')}, /* Sinhala */ 397 {"sid", HB_TAG('S','I','D',' ')}, /* Sidamo */ 398 {"sjd", HB_TAG('K','S','M',' ')}, /* Kildin Sami */ 399 {"sk", HB_TAG('S','K','Y',' ')}, /* Slovak */ 400 {"skr", HB_TAG('S','R','K',' ')}, /* Seraiki */ 401 {"sl", HB_TAG('S','L','V',' ')}, /* Slovenian */ 402 {"sm", HB_TAG('S','M','O',' ')}, /* Samoan */ 403 {"sma", HB_TAG('S','S','M',' ')}, /* Southern Sami */ 404 {"smj", HB_TAG('L','S','M',' ')}, /* Lule Sami */ 405 {"smn", HB_TAG('I','S','M',' ')}, /* Inari Sami */ 406 {"sms", HB_TAG('S','K','S',' ')}, /* Skolt Sami */ 407 {"snk", HB_TAG('S','N','K',' ')}, /* Soninke */ 408 {"so", HB_TAG('S','M','L',' ')}, /* Somali */ 409 {"sq", HB_TAG('S','Q','I',' ')}, /* Albanian */ 410 {"sr", HB_TAG('S','R','B',' ')}, /* Serbian */ 411 {"srr", HB_TAG('S','R','R',' ')}, /* Serer */ 412 {"suq", HB_TAG('S','U','R',' ')}, /* Suri */ 413 {"sv", HB_TAG('S','V','E',' ')}, /* Swedish */ 414 {"sva", HB_TAG('S','V','A',' ')}, /* Svan */ 415 {"sw", HB_TAG('S','W','K',' ')}, /* Swahili */ 416 {"swb", HB_TAG('C','M','R',' ')}, /* Comorian */ 417 {"syr", HB_TAG('S','Y','R',' ')}, /* Syriac */ 418 {"ta", HB_TAG('T','A','M',' ')}, /* Tamil */ 419 {"tcy", HB_TAG('T','U','L',' ')}, /* Tulu */ 420 {"te", HB_TAG('T','E','L',' ')}, /* Telugu */ 421 {"tg", HB_TAG('T','A','J',' ')}, /* Tajik */ 422 {"th", HB_TAG('T','H','A',' ')}, /* Thai */ 423 {"ti", HB_TAG('T','G','Y',' ')}, /* Tigrinya */ 424 {"tig", HB_TAG('T','G','R',' ')}, /* Tigre */ 425 {"tk", HB_TAG('T','K','M',' ')}, /* Turkmen */ 426 {"tn", HB_TAG('T','N','A',' ')}, /* Tswana */ 427 {"tnz", HB_TAG('T','N','G',' ')}, /* Tonga (Thailand) */ 428 {"to", HB_TAG('T','N','G',' ')}, /* Tonga (Tonga Islands) */ 429 {"tog", HB_TAG('T','N','G',' ')}, /* Tonga (Nyasa) */ 430 {"toi", HB_TAG('T','N','G',' ')}, /* Tonga (Zambia) */ 431 {"tr", HB_TAG('T','R','K',' ')}, /* Turkish */ 432 {"ts", HB_TAG('T','S','G',' ')}, /* Tsonga */ 433 {"tt", HB_TAG('T','A','T',' ')}, /* Tatar */ 434 {"tw", HB_TAG('T','W','I',' ')}, /* Twi */ 435 {"ty", HB_TAG('T','H','T',' ')}, /* Tahitian */ 436 {"udm", HB_TAG('U','D','M',' ')}, /* Udmurt */ 437 {"ug", HB_TAG('U','Y','G',' ')}, /* Uighur */ 438 {"uk", HB_TAG('U','K','R',' ')}, /* Ukrainian */ 439 {"unr", HB_TAG('M','U','N',' ')}, /* Mundari */ 440 {"ur", HB_TAG('U','R','D',' ')}, /* Urdu */ 441 {"uz", HB_TAG('U','Z','B',' ')}, /* Uzbek */ 442 {"ve", HB_TAG('V','E','N',' ')}, /* Venda */ 443 {"vi", HB_TAG('V','I','T',' ')}, /* Vietnamese */ 444 {"wbm", HB_TAG('W','A',' ',' ')}, /* Wa */ 445 {"wbr", HB_TAG('W','A','G',' ')}, /* Wagdi */ 446 {"wo", HB_TAG('W','L','F',' ')}, /* Wolof */ 447 {"xal", HB_TAG('K','L','M',' ')}, /* Kalmyk */ 448 {"xh", HB_TAG('X','H','S',' ')}, /* Xhosa */ 449 {"xom", HB_TAG('K','M','O',' ')}, /* Komo (Sudan) */ 450 {"xsl", HB_TAG('S','S','L',' ')}, /* South Slavey */ 451 {"yi", HB_TAG('J','I','I',' ')}, /* Yiddish */ 452 {"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */ 453 {"yso", HB_TAG('N','I','S',' ')}, /* Nisi (China) */ 454 {"zh-cn", HB_TAG('Z','H','S',' ')}, /* Chinese (China) */ 455 {"zh-hk", HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */ 456 {"zh-mo", HB_TAG('Z','H','T',' ')}, /* Chinese (Macao) */ 457 {"zh-sg", HB_TAG('Z','H','S',' ')}, /* Chinese (Singapore) */ 458 {"zh-tw", HB_TAG('Z','H','T',' ')}, /* Chinese (Taiwan) */ 459 {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */ 460 {"zu", HB_TAG('Z','U','L',' ')} /* Zulu */ 461 462 /* I couldn't find the language id for these */ 463 464/*{"??", HB_TAG('A','G','W',' ')},*/ /* Agaw */ 465/*{"??", HB_TAG('A','L','S',' ')},*/ /* Alsatian */ 466/*{"??", HB_TAG('A','L','T',' ')},*/ /* Altai */ 467/*{"??", HB_TAG('A','R','K',' ')},*/ /* Arakanese */ 468/*{"??", HB_TAG('A','T','H',' ')},*/ /* Athapaskan */ 469/*{"??", HB_TAG('B','A','G',' ')},*/ /* Baghelkhandi */ 470/*{"??", HB_TAG('B','A','L',' ')},*/ /* Balkar */ 471/*{"??", HB_TAG('B','A','U',' ')},*/ /* Baule */ 472/*{"??", HB_TAG('B','B','R',' ')},*/ /* Berber */ 473/*{"??", HB_TAG('B','C','R',' ')},*/ /* Bible Cree */ 474/*{"??", HB_TAG('B','E','L',' ')},*/ /* Belarussian */ 475/*{"??", HB_TAG('B','I','L',' ')},*/ /* Bilen */ 476/*{"??", HB_TAG('B','K','F',' ')},*/ /* Blackfoot */ 477/*{"??", HB_TAG('B','L','N',' ')},*/ /* Balante */ 478/*{"??", HB_TAG('B','M','L',' ')},*/ /* Bamileke */ 479/*{"??", HB_TAG('B','R','I',' ')},*/ /* Braj Bhasha */ 480/*{"??", HB_TAG('C','H','G',' ')},*/ /* Chaha Gurage */ 481/*{"??", HB_TAG('C','H','H',' ')},*/ /* Chattisgarhi */ 482/*{"??", HB_TAG('C','H','K',' ')},*/ /* Chukchi */ 483/*{"??", HB_TAG('D','J','R',' ')},*/ /* Djerma */ 484/*{"??", HB_TAG('D','N','G',' ')},*/ /* Dangme */ 485/*{"??", HB_TAG('E','C','R',' ')},*/ /* Eastern Cree */ 486/*{"??", HB_TAG('F','A','N',' ')},*/ /* French Antillean */ 487/*{"??", HB_TAG('F','L','E',' ')},*/ /* Flemish */ 488/*{"??", HB_TAG('F','N','E',' ')},*/ /* Forest Nenets */ 489/*{"??", HB_TAG('F','T','A',' ')},*/ /* Futa */ 490/*{"??", HB_TAG('G','A','R',' ')},*/ /* Garshuni */ 491/*{"??", HB_TAG('G','E','Z',' ')},*/ /* Ge'ez */ 492/*{"??", HB_TAG('H','A','L',' ')},*/ /* Halam */ 493/*{"??", HB_TAG('H','A','R',' ')},*/ /* Harauti */ 494/*{"??", HB_TAG('H','A','W',' ')},*/ /* Hawaiin */ 495/*{"??", HB_TAG('H','B','N',' ')},*/ /* Hammer-Banna */ 496/*{"??", HB_TAG('H','M','A',' ')},*/ /* High Mari */ 497/*{"??", HB_TAG('H','N','D',' ')},*/ /* Hindko */ 498/*{"??", HB_TAG('I','J','O',' ')},*/ /* Ijo */ 499/*{"??", HB_TAG('I','L','O',' ')},*/ /* Ilokano */ 500/*{"??", HB_TAG('I','R','T',' ')},*/ /* Irish Traditional */ 501/*{"??", HB_TAG('J','U','L',' ')},*/ /* Jula */ 502/*{"??", HB_TAG('K','A','R',' ')},*/ /* Karachay */ 503/*{"??", HB_TAG('K','E','B',' ')},*/ /* Kebena */ 504/*{"??", HB_TAG('K','G','E',' ')},*/ /* Khutsuri Georgian */ 505/*{"??", HB_TAG('K','H','A',' ')},*/ /* Khakass */ 506/*{"??", HB_TAG('K','H','K',' ')},*/ /* Khanty-Kazim */ 507/*{"??", HB_TAG('K','H','S',' ')},*/ /* Khanty-Shurishkar */ 508/*{"??", HB_TAG('K','H','V',' ')},*/ /* Khanty-Vakhi */ 509/*{"??", HB_TAG('K','I','S',' ')},*/ /* Kisii */ 510/*{"??", HB_TAG('K','K','N',' ')},*/ /* Kokni */ 511/*{"??", HB_TAG('K','M','S',' ')},*/ /* Komso */ 512/*{"??", HB_TAG('K','O','D',' ')},*/ /* Kodagu */ 513/*{"??", HB_TAG('K','O','H',' ')},*/ /* Korean Old Hangul */ 514/*{"??", HB_TAG('K','O','N',' ')},*/ /* Kikongo */ 515/*{"??", HB_TAG('K','R','K',' ')},*/ /* Karakalpak */ 516/*{"??", HB_TAG('K','R','N',' ')},*/ /* Karen */ 517/*{"??", HB_TAG('K','U','L',' ')},*/ /* Kulvi */ 518/*{"??", HB_TAG('L','A','H',' ')},*/ /* Lahuli */ 519/*{"??", HB_TAG('L','A','M',' ')},*/ /* Lambani */ 520/*{"??", HB_TAG('L','C','R',' ')},*/ /* L-Cree */ 521/*{"??", HB_TAG('L','E','Z',' ')},*/ /* Lezgi */ 522/*{"??", HB_TAG('L','M','A',' ')},*/ /* Low Mari */ 523/*{"??", HB_TAG('L','U','B',' ')},*/ /* Luba */ 524/*{"??", HB_TAG('L','U','G',' ')},*/ /* Luganda */ 525/*{"??", HB_TAG('L','U','H',' ')},*/ /* Luhya */ 526/*{"??", HB_TAG('M','A','K',' ')},*/ /* Makua */ 527/*{"??", HB_TAG('M','A','L',' ')},*/ /* Malayalam Traditional */ 528/*{"??", HB_TAG('M','B','N',' ')},*/ /* Mbundu */ 529/*{"??", HB_TAG('M','I','Z',' ')},*/ /* Mizo */ 530/*{"??", HB_TAG('M','L','N',' ')},*/ /* Malinke */ 531/*{"??", HB_TAG('M','N','K',' ')},*/ /* Maninka */ 532/*{"??", HB_TAG('M','O','R',' ')},*/ /* Moroccan */ 533/*{"??", HB_TAG('N','A','G',' ')},*/ /* Naga-Assamese */ 534/*{"??", HB_TAG('N','C','R',' ')},*/ /* N-Cree */ 535/*{"??", HB_TAG('N','D','B',' ')},*/ /* Ndebele */ 536/*{"??", HB_TAG('N','G','R',' ')},*/ /* Nagari */ 537/*{"??", HB_TAG('N','H','C',' ')},*/ /* Norway House Cree */ 538/*{"??", HB_TAG('N','K','L',' ')},*/ /* Nkole */ 539/*{"??", HB_TAG('N','T','A',' ')},*/ /* Northern Tai */ 540/*{"??", HB_TAG('O','C','R',' ')},*/ /* Oji-Cree */ 541/*{"??", HB_TAG('P','A','A',' ')},*/ /* Palestinian Aramaic */ 542/*{"??", HB_TAG('P','G','R',' ')},*/ /* Polytonic Greek */ 543/*{"??", HB_TAG('P','L','G',' ')},*/ /* Palaung */ 544/*{"??", HB_TAG('Q','I','N',' ')},*/ /* Chin */ 545/*{"??", HB_TAG('R','B','U',' ')},*/ /* Russian Buriat */ 546/*{"??", HB_TAG('R','C','R',' ')},*/ /* R-Cree */ 547/*{"??", HB_TAG('R','M','S',' ')},*/ /* Rhaeto-Romanic */ 548/*{"??", HB_TAG('R','U','A',' ')},*/ /* Ruanda */ 549/*{"??", HB_TAG('S','A','Y',' ')},*/ /* Sayisi */ 550/*{"??", HB_TAG('S','E','K',' ')},*/ /* Sekota */ 551/*{"??", HB_TAG('S','I','G',' ')},*/ /* Silte Gurage */ 552/*{"??", HB_TAG('S','L','A',' ')},*/ /* Slavey */ 553/*{"??", HB_TAG('S','O','G',' ')},*/ /* Sodo Gurage */ 554/*{"??", HB_TAG('S','O','T',' ')},*/ /* Sotho */ 555/*{"??", HB_TAG('S','W','A',' ')},*/ /* Swadaya Aramaic */ 556/*{"??", HB_TAG('S','W','Z',' ')},*/ /* Swazi */ 557/*{"??", HB_TAG('S','X','T',' ')},*/ /* Sutu */ 558/*{"??", HB_TAG('T','A','B',' ')},*/ /* Tabasaran */ 559/*{"??", HB_TAG('T','C','R',' ')},*/ /* TH-Cree */ 560/*{"??", HB_TAG('T','G','N',' ')},*/ /* Tongan */ 561/*{"??", HB_TAG('T','M','N',' ')},*/ /* Temne */ 562/*{"??", HB_TAG('T','N','E',' ')},*/ /* Tundra Nenets */ 563/*{"??", HB_TAG('T','O','D',' ')},*/ /* Todo */ 564/*{"??", HB_TAG('T','U','A',' ')},*/ /* Turoyo Aramaic */ 565/*{"??", HB_TAG('T','U','V',' ')},*/ /* Tuvin */ 566/*{"??", HB_TAG('W','C','R',' ')},*/ /* West-Cree */ 567/*{"??", HB_TAG('X','B','D',' ')},*/ /* Tai Lue */ 568/*{"??", HB_TAG('Y','C','R',' ')},*/ /* Y-Cree */ 569/*{"??", HB_TAG('Y','I','C',' ')},*/ /* Yi Classic */ 570/*{"??", HB_TAG('Y','I','M',' ')},*/ /* Yi Modern */ 571/*{"??", HB_TAG('Z','H','P',' ')},*/ /* Chinese Phonetic */ 572}; 573 574static int 575lang_compare_first_component (const char *a, 576 const char *b) 577{ 578 unsigned int da, db; 579 const char *p; 580 581 p = strstr (a, "-"); 582 da = p ? (unsigned int) (p - a) : strlen (a); 583 584 p = strstr (b, "-"); 585 db = p ? (unsigned int) (p - b) : strlen (b); 586 587 return strncmp (a, b, MAX (da, db)); 588} 589 590static hb_bool_t 591lang_matches (const char *lang_str, const char *spec) 592{ 593 unsigned int len = strlen (spec); 594 595 return lang_str && strncmp (lang_str, spec, len) == 0 && 596 (lang_str[len] == '\0' || lang_str[len] == '-'); 597} 598 599hb_tag_t 600hb_ot_tag_from_language (hb_language_t language) 601{ 602 const char *lang_str; 603 LangTag *lang_tag; 604 605 if (language == NULL) 606 return HB_OT_TAG_DEFAULT_LANGUAGE; 607 608 lang_str = hb_language_to_string (language); 609 610 if (0 == strncmp (lang_str, "x-hbot", 6)) { 611 char tag[4]; 612 int i; 613 lang_str += 6; 614 for (i = 0; i < 4 && ISALPHA (lang_str[i]); i++) 615 tag[i] = TOUPPER (lang_str[i]); 616 for (; i < 4; i++) 617 tag[i] = ' '; 618 return HB_TAG_CHAR4 (tag); 619 } 620 621 /* find a language matching in the first component */ 622 lang_tag = bsearch (lang_str, ot_languages, 623 ARRAY_LENGTH (ot_languages), sizeof (LangTag), 624 (hb_compare_func_t) lang_compare_first_component); 625 626 /* we now need to find the best language matching */ 627 if (lang_tag) 628 { 629 hb_bool_t found = FALSE; 630 631 /* go to the final one matching in the first component */ 632 while (lang_tag + 1 < ot_languages + ARRAY_LENGTH (ot_languages) && 633 lang_compare_first_component (lang_str, (lang_tag + 1)->language) == 0) 634 lang_tag++; 635 636 /* go back, find which one matches completely */ 637 while (lang_tag >= ot_languages && 638 lang_compare_first_component (lang_str, lang_tag->language) == 0) 639 { 640 if (lang_matches (lang_str, lang_tag->language)) { 641 found = TRUE; 642 break; 643 } 644 645 lang_tag--; 646 } 647 648 if (!found) 649 lang_tag = NULL; 650 } 651 652 if (lang_tag) 653 return lang_tag->tag; 654 655 return HB_OT_TAG_DEFAULT_LANGUAGE; 656} 657 658hb_language_t 659hb_ot_tag_to_language (hb_tag_t tag) 660{ 661 unsigned int i; 662 unsigned char buf[11] = "x-hbot"; 663 664 for (i = 0; i < ARRAY_LENGTH (ot_languages); i++) 665 if (ot_languages[i].tag == tag) 666 return hb_language_from_string (ot_languages[i].language); 667 668 buf[6] = tag >> 24; 669 buf[7] = (tag >> 16) & 0xFF; 670 buf[8] = (tag >> 8) & 0xFF; 671 buf[9] = tag & 0xFF; 672 buf[10] = '\0'; 673 return hb_language_from_string ((char *) buf); 674} 675 676 677HB_END_DECLS 678