hb-ot-tag.cc revision 62879eebd9965179af8602ba29ac0a64a739b757
1/*
2 * Copyright (C) 2009  Red Hat, Inc.
3 *
4 *  This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Red Hat Author(s): Behdad Esfahbod
25 */
26
27#include "hb-private.h"
28#include "hb-ot.h"
29
30#include <string.h>
31
32HB_BEGIN_DECLS
33
34
35/* hb_script_t */
36
37static hb_tag_t
38hb_ot_old_tag_from_script (hb_script_t script)
39{
40  switch ((hb_tag_t) script) {
41    case HB_SCRIPT_COPTIC:		return HB_TAG('c','o','p','t');
42    case HB_SCRIPT_HIRAGANA:		return HB_TAG('k','a','n','a');
43    case HB_SCRIPT_LAO:			return HB_TAG('l','a','o',' ');
44    case HB_SCRIPT_YI:			return HB_TAG('y','i',' ',' ');
45    /* Unicode-5.0 additions */
46    case HB_SCRIPT_NKO:			return HB_TAG('n','k','o',' ');
47    /* Unicode-5.1 additions */
48    case HB_SCRIPT_VAI:			return HB_TAG('v','a','i',' ');
49    /* Unicode-5.2 additions */
50    case HB_SCRIPT_MEETEI_MAYEK:	return HB_TAG('m','y','e','i');
51    /* Unicode-6.0 additions */
52  }
53
54  /* Else, just change first char to lowercase and return */
55  return ((hb_tag_t) script) | 0x02000000;
56}
57
58static hb_script_t
59hb_ot_old_tag_to_script (hb_tag_t tag)
60{
61  switch (tag) {
62    case HB_TAG('c','o','p','t'):	return HB_SCRIPT_COPTIC;
63    case HB_TAG('k','a','n','a'):	return HB_SCRIPT_HIRAGANA;
64    case HB_TAG('l','a','o',' '):	return HB_SCRIPT_LAO;
65    case HB_TAG('y','i',' ',' '):	return HB_SCRIPT_YI;
66    /* Unicode-5.0 additions */
67    case HB_TAG('n','k','o',' '):	return HB_SCRIPT_NKO;
68    /* Unicode-5.1 additions */
69    case HB_TAG('v','a','i',' '):	return HB_SCRIPT_VAI;
70    /* Unicode-5.2 additions */
71    case HB_TAG('m','y','e','i'):	return HB_SCRIPT_MEETEI_MAYEK;
72    /* Unicode-6.0 additions */
73  }
74
75  /* Else, just change first char to uppercase and return */
76  return (hb_script_t) (tag & ~0x02000000);
77}
78
79static hb_tag_t
80hb_ot_new_tag_from_script (hb_script_t script)
81{
82  switch ((hb_tag_t) script) {
83    case HB_SCRIPT_BENGALI:		return HB_TAG('b','n','g','2');
84    case HB_SCRIPT_DEVANAGARI:		return HB_TAG('d','e','v','2');
85    case HB_SCRIPT_GUJARATI:		return HB_TAG('g','j','r','2');
86    case HB_SCRIPT_GURMUKHI:		return HB_TAG('g','u','r','2');
87    case HB_SCRIPT_KANNADA:		return HB_TAG('k','n','d','2');
88    case HB_SCRIPT_MALAYALAM:		return HB_TAG('m','l','m','2');
89    case HB_SCRIPT_ORIYA:		return HB_TAG('o','r','y','2');
90    case HB_SCRIPT_TAMIL:		return HB_TAG('t','m','l','2');
91    case HB_SCRIPT_TELUGU:		return HB_TAG('t','e','l','2');
92  }
93
94  return HB_TAG_NONE;
95}
96
97static hb_script_t
98hb_ot_new_tag_to_script (hb_tag_t tag)
99{
100  switch (tag) {
101    case HB_TAG('b','n','g','2'):	return HB_SCRIPT_BENGALI;
102    case HB_TAG('d','e','v','2'):	return HB_SCRIPT_DEVANAGARI;
103    case HB_TAG('g','j','r','2'):	return HB_SCRIPT_GUJARATI;
104    case HB_TAG('g','u','r','2'):	return HB_SCRIPT_GURMUKHI;
105    case HB_TAG('k','n','d','2'):	return HB_SCRIPT_KANNADA;
106    case HB_TAG('m','l','m','2'):	return HB_SCRIPT_MALAYALAM;
107    case HB_TAG('o','r','y','2'):	return HB_SCRIPT_ORIYA;
108    case HB_TAG('t','m','l','2'):	return HB_SCRIPT_TAMIL;
109    case HB_TAG('t','e','l','2'):	return HB_SCRIPT_TELUGU;
110  }
111
112  return HB_SCRIPT_UNKNOWN;
113}
114
115/*
116 * Complete list at:
117 * http://www.microsoft.com/typography/otspec/scripttags.htm
118 *
119 * Most of the script tags are the same as the ISO 15924 tag but lowercased.
120 * So we just do that, and handle the exceptional cases in a switch.
121 */
122
123void
124hb_ot_tags_from_script (hb_script_t  script,
125			hb_tag_t    *script_tag_1,
126			hb_tag_t    *script_tag_2)
127{
128  hb_tag_t new_tag;
129
130  *script_tag_2 = HB_TAG_NONE;
131  *script_tag_1 = hb_ot_old_tag_from_script (script);
132
133  new_tag = hb_ot_new_tag_from_script (script);
134  if (unlikely (new_tag != HB_TAG_NONE)) {
135    *script_tag_2 = *script_tag_1;
136    *script_tag_1 = new_tag;
137  }
138}
139
140hb_script_t
141hb_ot_tag_to_script (hb_tag_t tag)
142{
143  if (unlikely ((tag & 0x000000FF) == '2'))
144    return hb_ot_new_tag_to_script (tag);
145
146  return hb_ot_old_tag_to_script (tag);
147}
148
149
150/* hb_language_t */
151
152typedef struct {
153  char language[6];
154  hb_tag_t tag;
155} LangTag;
156
157/*
158 * Complete list at:
159 * http://www.microsoft.com/typography/otspec/languagetags.htm
160 *
161 * Generated by intersecting the OpenType language tag list from
162 * Draft OpenType 1.5 spec, with with the ISO 639-3 codes from
163 * 2008/08/04, matching on name, and finally adjusted manually.
164 *
165 * Many items still missing.  Those are commented out at the end.
166 * Keep sorted for bsearch.
167 */
168static const LangTag ot_languages[] = {
169  {"aa",	HB_TAG('A','F','R',' ')},	/* Afar */
170  {"ab",	HB_TAG('A','B','K',' ')},	/* Abkhazian */
171  {"abq",	HB_TAG('A','B','A',' ')},	/* Abaza */
172  {"ady",	HB_TAG('A','D','Y',' ')},	/* Adyghe */
173  {"af",	HB_TAG('A','F','K',' ')},	/* Afrikaans */
174  {"aiw",	HB_TAG('A','R','I',' ')},	/* Aari */
175  {"am",	HB_TAG('A','M','H',' ')},	/* Amharic */
176  {"ar",	HB_TAG('A','R','A',' ')},	/* Arabic */
177  {"arn",	HB_TAG('M','A','P',' ')},	/* Mapudungun */
178  {"as",	HB_TAG('A','S','M',' ')},	/* Assamese */
179  {"av",	HB_TAG('A','V','R',' ')},	/* Avaric */
180  {"awa",	HB_TAG('A','W','A',' ')},	/* Awadhi */
181  {"ay",	HB_TAG('A','Y','M',' ')},	/* Aymara */
182  {"az",	HB_TAG('A','Z','E',' ')},	/* Azerbaijani */
183  {"ba",	HB_TAG('B','S','H',' ')},	/* Bashkir */
184  {"bal",	HB_TAG('B','L','I',' ')},	/* Baluchi */
185  {"bcq",	HB_TAG('B','C','H',' ')},	/* Bench */
186  {"bem",	HB_TAG('B','E','M',' ')},	/* Bemba (Zambia) */
187  {"bfq",	HB_TAG('B','A','D',' ')},	/* Badaga */
188  {"bft",	HB_TAG('B','L','T',' ')},	/* Balti */
189  {"bg",	HB_TAG('B','G','R',' ')},	/* Bulgarian */
190  {"bhb",	HB_TAG('B','H','I',' ')},	/* Bhili */
191  {"bho",	HB_TAG('B','H','O',' ')},	/* Bhojpuri */
192  {"bik",	HB_TAG('B','I','K',' ')},	/* Bikol */
193  {"bin",	HB_TAG('E','D','O',' ')},	/* Bini */
194  {"bm",	HB_TAG('B','M','B',' ')},	/* Bambara */
195  {"bn",	HB_TAG('B','E','N',' ')},	/* Bengali */
196  {"bo",	HB_TAG('T','I','B',' ')},	/* Tibetan */
197  {"br",	HB_TAG('B','R','E',' ')},	/* Breton */
198  {"brh",	HB_TAG('B','R','H',' ')},	/* Brahui */
199  {"bs",	HB_TAG('B','O','S',' ')},	/* Bosnian */
200  {"btb",	HB_TAG('B','T','I',' ')},	/* Beti (Cameroon) */
201  {"ca",	HB_TAG('C','A','T',' ')},	/* Catalan */
202  {"ce",	HB_TAG('C','H','E',' ')},	/* Chechen */
203  {"ceb",	HB_TAG('C','E','B',' ')},	/* Cebuano */
204  {"chp",	HB_TAG('C','H','P',' ')},	/* Chipewyan */
205  {"chr",	HB_TAG('C','H','R',' ')},	/* Cherokee */
206  {"cop",	HB_TAG('C','O','P',' ')},	/* Coptic */
207  {"cr",	HB_TAG('C','R','E',' ')},	/* Cree */
208  {"crh",	HB_TAG('C','R','T',' ')},	/* Crimean Tatar */
209  {"crm",	HB_TAG('M','C','R',' ')},	/* Moose Cree */
210  {"crx",	HB_TAG('C','R','R',' ')},	/* Carrier */
211  {"cs",	HB_TAG('C','S','Y',' ')},	/* Czech */
212  {"cu",	HB_TAG('C','S','L',' ')},	/* Church Slavic */
213  {"cv",	HB_TAG('C','H','U',' ')},	/* Chuvash */
214  {"cwd",	HB_TAG('D','C','R',' ')},	/* Woods Cree */
215  {"cy",	HB_TAG('W','E','L',' ')},	/* Welsh */
216  {"da",	HB_TAG('D','A','N',' ')},	/* Danish */
217  {"dap",	HB_TAG('N','I','S',' ')},	/* Nisi (India) */
218  {"dar",	HB_TAG('D','A','R',' ')},	/* Dargwa */
219  {"de",	HB_TAG('D','E','U',' ')},	/* German */
220  {"din",	HB_TAG('D','N','K',' ')},	/* Dinka */
221  {"dng",	HB_TAG('D','U','N',' ')},	/* Dungan */
222  {"doi",	HB_TAG('D','G','R',' ')},	/* Dogri */
223  {"dsb",	HB_TAG('L','S','B',' ')},	/* Lower Sorbian */
224  {"dv",	HB_TAG('D','I','V',' ')},	/* Dhivehi */
225  {"dz",	HB_TAG('D','Z','N',' ')},	/* Dzongkha */
226  {"ee",	HB_TAG('E','W','E',' ')},	/* Ewe */
227  {"efi",	HB_TAG('E','F','I',' ')},	/* Efik */
228  {"el",	HB_TAG('E','L','L',' ')},	/* Modern Greek (1453-) */
229  {"en",	HB_TAG('E','N','G',' ')},	/* English */
230  {"eo",	HB_TAG('N','T','O',' ')},	/* Esperanto */
231  {"eot",	HB_TAG('B','T','I',' ')},	/* Beti (Côte d'Ivoire) */
232  {"es",	HB_TAG('E','S','P',' ')},	/* Spanish */
233  {"et",	HB_TAG('E','T','I',' ')},	/* Estonian */
234  {"eu",	HB_TAG('E','U','Q',' ')},	/* Basque */
235  {"eve",	HB_TAG('E','V','N',' ')},	/* Even */
236  {"evn",	HB_TAG('E','V','K',' ')},	/* Evenki */
237  {"fa",	HB_TAG('F','A','R',' ')},	/* Persian */
238  {"ff",	HB_TAG('F','U','L',' ')},	/* Fulah */
239  {"fi",	HB_TAG('F','I','N',' ')},	/* Finnish */
240  {"fil",	HB_TAG('P','I','L',' ')},	/* Filipino */
241  {"fj",	HB_TAG('F','J','I',' ')},	/* Fijian */
242  {"fo",	HB_TAG('F','O','S',' ')},	/* Faroese */
243  {"fon",	HB_TAG('F','O','N',' ')},	/* Fon */
244  {"fr",	HB_TAG('F','R','A',' ')},	/* French */
245  {"fur",	HB_TAG('F','R','L',' ')},	/* Friulian */
246  {"fy",	HB_TAG('F','R','I',' ')},	/* Western Frisian */
247  {"ga",	HB_TAG('I','R','I',' ')},	/* Irish */
248  {"gaa",	HB_TAG('G','A','D',' ')},	/* Ga */
249  {"gag",	HB_TAG('G','A','G',' ')},	/* Gagauz */
250  {"gbm",	HB_TAG('G','A','W',' ')},	/* Garhwali */
251  {"gd",	HB_TAG('G','A','E',' ')},	/* Scottish Gaelic */
252  {"gl",	HB_TAG('G','A','L',' ')},	/* Galician */
253  {"gld",	HB_TAG('N','A','N',' ')},	/* Nanai */
254  {"gn",	HB_TAG('G','U','A',' ')},	/* Guarani */
255  {"gon",	HB_TAG('G','O','N',' ')},	/* Gondi */
256  {"grt",	HB_TAG('G','R','O',' ')},	/* Garo */
257  {"gu",	HB_TAG('G','U','J',' ')},	/* Gujarati */
258  {"guk",	HB_TAG('G','M','Z',' ')},	/* Gumuz */
259  {"gv",	HB_TAG('M','N','X',' ')},	/* Manx Gaelic */
260  {"ha",	HB_TAG('H','A','U',' ')},	/* Hausa */
261  {"har",	HB_TAG('H','R','I',' ')},	/* Harari */
262  {"he",	HB_TAG('I','W','R',' ')},	/* Hebrew */
263  {"hi",	HB_TAG('H','I','N',' ')},	/* Hindi */
264  {"hil",	HB_TAG('H','I','L',' ')},	/* Hiligaynon */
265  {"hoc",	HB_TAG('H','O',' ',' ')},	/* Ho */
266  {"hr",	HB_TAG('H','R','V',' ')},	/* Croatian */
267  {"hsb",	HB_TAG('U','S','B',' ')},	/* Upper Sorbian */
268  {"ht",	HB_TAG('H','A','I',' ')},	/* Haitian */
269  {"hu",	HB_TAG('H','U','N',' ')},	/* Hungarian */
270  {"hy",	HB_TAG('H','Y','E',' ')},	/* Armenian */
271  {"id",	HB_TAG('I','N','D',' ')},	/* Indonesian */
272  {"ig",	HB_TAG('I','B','O',' ')},	/* Igbo */
273  {"igb",	HB_TAG('E','B','I',' ')},	/* Ebira */
274  {"inh",	HB_TAG('I','N','G',' ')},	/* Ingush */
275  {"is",	HB_TAG('I','S','L',' ')},	/* Icelandic */
276  {"it",	HB_TAG('I','T','A',' ')},	/* Italian */
277  {"iu",	HB_TAG('I','N','U',' ')},	/* Inuktitut */
278  {"ja",	HB_TAG('J','A','N',' ')},	/* Japanese */
279  {"jv",	HB_TAG('J','A','V',' ')},	/* Javanese */
280  {"ka",	HB_TAG('K','A','T',' ')},	/* Georgian */
281  {"kam",	HB_TAG('K','M','B',' ')},	/* Kamba (Kenya) */
282  {"kbd",	HB_TAG('K','A','B',' ')},	/* Kabardian */
283  {"kdr",	HB_TAG('K','R','M',' ')},	/* Karaim */
284  {"kdt",	HB_TAG('K','U','Y',' ')},	/* Kuy */
285  {"kfr",	HB_TAG('K','A','C',' ')},	/* Kachchi */
286  {"kfy",	HB_TAG('K','M','N',' ')},	/* Kumaoni */
287  {"kha",	HB_TAG('K','S','I',' ')},	/* Khasi */
288  {"khw",	HB_TAG('K','H','W',' ')},	/* Khowar */
289  {"ki",	HB_TAG('K','I','K',' ')},	/* Kikuyu */
290  {"kk",	HB_TAG('K','A','Z',' ')},	/* Kazakh */
291  {"kl",	HB_TAG('G','R','N',' ')},	/* Kalaallisut */
292  {"kln",	HB_TAG('K','A','L',' ')},	/* Kalenjin */
293  {"km",	HB_TAG('K','H','M',' ')},	/* Central Khmer */
294  {"kmw",	HB_TAG('K','M','O',' ')},	/* Komo (Democratic Republic of Congo) */
295  {"kn",	HB_TAG('K','A','N',' ')},	/* Kannada */
296  {"ko",	HB_TAG('K','O','R',' ')},	/* Korean */
297  {"koi",	HB_TAG('K','O','P',' ')},	/* Komi-Permyak */
298  {"kok",	HB_TAG('K','O','K',' ')},	/* Konkani */
299  {"kpe",	HB_TAG('K','P','L',' ')},	/* Kpelle */
300  {"kpv",	HB_TAG('K','O','Z',' ')},	/* Komi-Zyrian */
301  {"kpy",	HB_TAG('K','Y','K',' ')},	/* Koryak */
302  {"kqy",	HB_TAG('K','R','T',' ')},	/* Koorete */
303  {"kr",	HB_TAG('K','N','R',' ')},	/* Kanuri */
304  {"kri",	HB_TAG('K','R','I',' ')},	/* Krio */
305  {"krl",	HB_TAG('K','R','L',' ')},	/* Karelian */
306  {"kru",	HB_TAG('K','U','U',' ')},	/* Kurukh */
307  {"ks",	HB_TAG('K','S','H',' ')},	/* Kashmiri */
308  {"ku",	HB_TAG('K','U','R',' ')},	/* Kurdish */
309  {"kum",	HB_TAG('K','U','M',' ')},	/* Kumyk */
310  {"kvd",	HB_TAG('K','U','I',' ')},	/* Kui (Indonesia) */
311  {"kxu",	HB_TAG('K','U','I',' ')},	/* Kui (India) */
312  {"ky",	HB_TAG('K','I','R',' ')},	/* Kirghiz */
313  {"la",	HB_TAG('L','A','T',' ')},	/* Latin */
314  {"lad",	HB_TAG('J','U','D',' ')},	/* Ladino */
315  {"lb",	HB_TAG('L','T','Z',' ')},	/* Luxembourgish */
316  {"lbe",	HB_TAG('L','A','K',' ')},	/* Lak */
317  {"lbj",	HB_TAG('L','D','K',' ')},	/* Ladakhi */
318  {"lif",	HB_TAG('L','M','B',' ')},	/* Limbu */
319  {"lld",	HB_TAG('L','A','D',' ')},	/* Ladin */
320  {"ln",	HB_TAG('L','I','N',' ')},	/* Lingala */
321  {"lo",	HB_TAG('L','A','O',' ')},	/* Lao */
322  {"lt",	HB_TAG('L','T','H',' ')},	/* Lithuanian */
323  {"luo",	HB_TAG('L','U','O',' ')},	/* Luo (Kenya and Tanzania) */
324  {"luw",	HB_TAG('L','U','O',' ')},	/* Luo (Cameroon) */
325  {"lv",	HB_TAG('L','V','I',' ')},	/* Latvian */
326  {"lzz",	HB_TAG('L','A','Z',' ')},	/* Laz */
327  {"mai",	HB_TAG('M','T','H',' ')},	/* Maithili */
328  {"mdc",	HB_TAG('M','L','E',' ')},	/* Male (Papua New Guinea) */
329  {"mdf",	HB_TAG('M','O','K',' ')},	/* Moksha */
330  {"mdy",	HB_TAG('M','L','E',' ')},	/* Male (Ethiopia) */
331  {"men",	HB_TAG('M','D','E',' ')},	/* Mende (Sierra Leone) */
332  {"mg",	HB_TAG('M','L','G',' ')},	/* Malagasy */
333  {"mi",	HB_TAG('M','R','I',' ')},	/* Maori */
334  {"mk",	HB_TAG('M','K','D',' ')},	/* Macedonian */
335  {"ml",	HB_TAG('M','L','R',' ')},	/* Malayalam */
336  {"mn",	HB_TAG('M','N','G',' ')},	/* Mongolian */
337  {"mnc",	HB_TAG('M','C','H',' ')},	/* Manchu */
338  {"mni",	HB_TAG('M','N','I',' ')},	/* Manipuri */
339  {"mnk",	HB_TAG('M','N','D',' ')},	/* Mandinka */
340  {"mns",	HB_TAG('M','A','N',' ')},	/* Mansi */
341  {"mnw",	HB_TAG('M','O','N',' ')},	/* Mon */
342  {"mo",	HB_TAG('M','O','L',' ')},	/* Moldavian */
343  {"moh",	HB_TAG('M','O','H',' ')},	/* Mohawk */
344  {"mpe",	HB_TAG('M','A','J',' ')},	/* Majang */
345  {"mr",	HB_TAG('M','A','R',' ')},	/* Marathi */
346  {"ms",	HB_TAG('M','L','Y',' ')},	/* Malay */
347  {"mt",	HB_TAG('M','T','S',' ')},	/* Maltese */
348  {"mwr",	HB_TAG('M','A','W',' ')},	/* Marwari */
349  {"my",	HB_TAG('B','R','M',' ')},	/* Burmese */
350  {"mym",	HB_TAG('M','E','N',' ')},	/* Me'en */
351  {"myv",	HB_TAG('E','R','Z',' ')},	/* Erzya */
352  {"nb",	HB_TAG('N','O','R',' ')},	/* Norwegian Bokmål */
353  {"nco",	HB_TAG('S','I','B',' ')},	/* Sibe */
354  {"ne",	HB_TAG('N','E','P',' ')},	/* Nepali */
355  {"new",	HB_TAG('N','E','W',' ')},	/* Newari */
356  {"ng",	HB_TAG('N','D','G',' ')},	/* Ndonga */
357  {"ngl",	HB_TAG('L','M','W',' ')},	/* Lomwe */
358  {"niu",	HB_TAG('N','I','U',' ')},	/* Niuean */
359  {"niv",	HB_TAG('G','I','L',' ')},	/* Gilyak */
360  {"nl",	HB_TAG('N','L','D',' ')},	/* Dutch */
361  {"nn",	HB_TAG('N','Y','N',' ')},	/* Norwegian Nynorsk */
362  {"no",	HB_TAG('N','O','R',' ')},	/* Norwegian (deprecated) */
363  {"nog",	HB_TAG('N','O','G',' ')},	/* Nogai */
364  {"nqo",	HB_TAG('N','K','O',' ')},	/* N'Ko */
365  {"nsk",	HB_TAG('N','A','S',' ')},	/* Naskapi */
366  {"ny",	HB_TAG('C','H','I',' ')},	/* Nyanja */
367  {"oc",	HB_TAG('O','C','I',' ')},	/* Occitan (post 1500) */
368  {"oj",	HB_TAG('O','J','B',' ')},	/* Ojibwa */
369  {"om",	HB_TAG('O','R','O',' ')},	/* Oromo */
370  {"or",	HB_TAG('O','R','I',' ')},	/* Oriya */
371  {"os",	HB_TAG('O','S','S',' ')},	/* Ossetian */
372  {"pa",	HB_TAG('P','A','N',' ')},	/* Panjabi */
373  {"pi",	HB_TAG('P','A','L',' ')},	/* Pali */
374  {"pl",	HB_TAG('P','L','K',' ')},	/* Polish */
375  {"plp",	HB_TAG('P','A','P',' ')},	/* Palpa */
376  {"prs",	HB_TAG('D','R','I',' ')},	/* Dari */
377  {"ps",	HB_TAG('P','A','S',' ')},	/* Pushto */
378  {"pt",	HB_TAG('P','T','G',' ')},	/* Portuguese */
379  {"raj",	HB_TAG('R','A','J',' ')},	/* Rajasthani */
380  {"ria",	HB_TAG('R','I','A',' ')},	/* Riang (India) */
381  {"ril",	HB_TAG('R','I','A',' ')},	/* Riang (Myanmar) */
382  {"ro",	HB_TAG('R','O','M',' ')},	/* Romanian */
383  {"rom",	HB_TAG('R','O','Y',' ')},	/* Romany */
384  {"ru",	HB_TAG('R','U','S',' ')},	/* Russian */
385  {"rue",	HB_TAG('R','S','Y',' ')},	/* Rusyn */
386  {"sa",	HB_TAG('S','A','N',' ')},	/* Sanskrit */
387  {"sah",	HB_TAG('Y','A','K',' ')},	/* Yakut */
388  {"sat",	HB_TAG('S','A','T',' ')},	/* Santali */
389  {"sck",	HB_TAG('S','A','D',' ')},	/* Sadri */
390  {"sd",	HB_TAG('S','N','D',' ')},	/* Sindhi */
391  {"se",	HB_TAG('N','S','M',' ')},	/* Northern Sami */
392  {"seh",	HB_TAG('S','N','A',' ')},	/* Sena */
393  {"sel",	HB_TAG('S','E','L',' ')},	/* Selkup */
394  {"sg",	HB_TAG('S','G','O',' ')},	/* Sango */
395  {"shn",	HB_TAG('S','H','N',' ')},	/* Shan */
396  {"si",	HB_TAG('S','N','H',' ')},	/* Sinhala */
397  {"sid",	HB_TAG('S','I','D',' ')},	/* Sidamo */
398  {"sjd",	HB_TAG('K','S','M',' ')},	/* Kildin Sami */
399  {"sk",	HB_TAG('S','K','Y',' ')},	/* Slovak */
400  {"skr",	HB_TAG('S','R','K',' ')},	/* Seraiki */
401  {"sl",	HB_TAG('S','L','V',' ')},	/* Slovenian */
402  {"sm",	HB_TAG('S','M','O',' ')},	/* Samoan */
403  {"sma",	HB_TAG('S','S','M',' ')},	/* Southern Sami */
404  {"smj",	HB_TAG('L','S','M',' ')},	/* Lule Sami */
405  {"smn",	HB_TAG('I','S','M',' ')},	/* Inari Sami */
406  {"sms",	HB_TAG('S','K','S',' ')},	/* Skolt Sami */
407  {"snk",	HB_TAG('S','N','K',' ')},	/* Soninke */
408  {"so",	HB_TAG('S','M','L',' ')},	/* Somali */
409  {"sq",	HB_TAG('S','Q','I',' ')},	/* Albanian */
410  {"sr",	HB_TAG('S','R','B',' ')},	/* Serbian */
411  {"srr",	HB_TAG('S','R','R',' ')},	/* Serer */
412  {"suq",	HB_TAG('S','U','R',' ')},	/* Suri */
413  {"sv",	HB_TAG('S','V','E',' ')},	/* Swedish */
414  {"sva",	HB_TAG('S','V','A',' ')},	/* Svan */
415  {"sw",	HB_TAG('S','W','K',' ')},	/* Swahili */
416  {"swb",	HB_TAG('C','M','R',' ')},	/* Comorian */
417  {"syr",	HB_TAG('S','Y','R',' ')},	/* Syriac */
418  {"ta",	HB_TAG('T','A','M',' ')},	/* Tamil */
419  {"tcy",	HB_TAG('T','U','L',' ')},	/* Tulu */
420  {"te",	HB_TAG('T','E','L',' ')},	/* Telugu */
421  {"tg",	HB_TAG('T','A','J',' ')},	/* Tajik */
422  {"th",	HB_TAG('T','H','A',' ')},	/* Thai */
423  {"ti",	HB_TAG('T','G','Y',' ')},	/* Tigrinya */
424  {"tig",	HB_TAG('T','G','R',' ')},	/* Tigre */
425  {"tk",	HB_TAG('T','K','M',' ')},	/* Turkmen */
426  {"tn",	HB_TAG('T','N','A',' ')},	/* Tswana */
427  {"tnz",	HB_TAG('T','N','G',' ')},	/* Tonga (Thailand) */
428  {"to",	HB_TAG('T','N','G',' ')},	/* Tonga (Tonga Islands) */
429  {"tog",	HB_TAG('T','N','G',' ')},	/* Tonga (Nyasa) */
430  {"toi",	HB_TAG('T','N','G',' ')},	/* Tonga (Zambia) */
431  {"tr",	HB_TAG('T','R','K',' ')},	/* Turkish */
432  {"ts",	HB_TAG('T','S','G',' ')},	/* Tsonga */
433  {"tt",	HB_TAG('T','A','T',' ')},	/* Tatar */
434  {"tw",	HB_TAG('T','W','I',' ')},	/* Twi */
435  {"ty",	HB_TAG('T','H','T',' ')},	/* Tahitian */
436  {"udm",	HB_TAG('U','D','M',' ')},	/* Udmurt */
437  {"ug",	HB_TAG('U','Y','G',' ')},	/* Uighur */
438  {"uk",	HB_TAG('U','K','R',' ')},	/* Ukrainian */
439  {"unr",	HB_TAG('M','U','N',' ')},	/* Mundari */
440  {"ur",	HB_TAG('U','R','D',' ')},	/* Urdu */
441  {"uz",	HB_TAG('U','Z','B',' ')},	/* Uzbek */
442  {"ve",	HB_TAG('V','E','N',' ')},	/* Venda */
443  {"vi",	HB_TAG('V','I','T',' ')},	/* Vietnamese */
444  {"wbm",	HB_TAG('W','A',' ',' ')},	/* Wa */
445  {"wbr",	HB_TAG('W','A','G',' ')},	/* Wagdi */
446  {"wo",	HB_TAG('W','L','F',' ')},	/* Wolof */
447  {"xal",	HB_TAG('K','L','M',' ')},	/* Kalmyk */
448  {"xh",	HB_TAG('X','H','S',' ')},	/* Xhosa */
449  {"xom",	HB_TAG('K','M','O',' ')},	/* Komo (Sudan) */
450  {"xsl",	HB_TAG('S','S','L',' ')},	/* South Slavey */
451  {"yi",	HB_TAG('J','I','I',' ')},	/* Yiddish */
452  {"yo",	HB_TAG('Y','B','A',' ')},	/* Yoruba */
453  {"yso",	HB_TAG('N','I','S',' ')},	/* Nisi (China) */
454  {"zh-cn",	HB_TAG('Z','H','S',' ')},	/* Chinese (China) */
455  {"zh-hk",	HB_TAG('Z','H','H',' ')},	/* Chinese (Hong Kong) */
456  {"zh-mo",	HB_TAG('Z','H','T',' ')},	/* Chinese (Macao) */
457  {"zh-sg",	HB_TAG('Z','H','S',' ')},	/* Chinese (Singapore) */
458  {"zh-tw",	HB_TAG('Z','H','T',' ')},	/* Chinese (Taiwan) */
459  {"zne",	HB_TAG('Z','N','D',' ')},	/* Zande */
460  {"zu",	HB_TAG('Z','U','L',' ')} 	/* Zulu */
461
462  /* I couldn't find the language id for these */
463
464/*{"??",	HB_TAG('A','G','W',' ')},*/	/* Agaw */
465/*{"??",	HB_TAG('A','L','S',' ')},*/	/* Alsatian */
466/*{"??",	HB_TAG('A','L','T',' ')},*/	/* Altai */
467/*{"??",	HB_TAG('A','R','K',' ')},*/	/* Arakanese */
468/*{"??",	HB_TAG('A','T','H',' ')},*/	/* Athapaskan */
469/*{"??",	HB_TAG('B','A','G',' ')},*/	/* Baghelkhandi */
470/*{"??",	HB_TAG('B','A','L',' ')},*/	/* Balkar */
471/*{"??",	HB_TAG('B','A','U',' ')},*/	/* Baule */
472/*{"??",	HB_TAG('B','B','R',' ')},*/	/* Berber */
473/*{"??",	HB_TAG('B','C','R',' ')},*/	/* Bible Cree */
474/*{"??",	HB_TAG('B','E','L',' ')},*/	/* Belarussian */
475/*{"??",	HB_TAG('B','I','L',' ')},*/	/* Bilen */
476/*{"??",	HB_TAG('B','K','F',' ')},*/	/* Blackfoot */
477/*{"??",	HB_TAG('B','L','N',' ')},*/	/* Balante */
478/*{"??",	HB_TAG('B','M','L',' ')},*/	/* Bamileke */
479/*{"??",	HB_TAG('B','R','I',' ')},*/	/* Braj Bhasha */
480/*{"??",	HB_TAG('C','H','G',' ')},*/	/* Chaha Gurage */
481/*{"??",	HB_TAG('C','H','H',' ')},*/	/* Chattisgarhi */
482/*{"??",	HB_TAG('C','H','K',' ')},*/	/* Chukchi */
483/*{"??",	HB_TAG('D','J','R',' ')},*/	/* Djerma */
484/*{"??",	HB_TAG('D','N','G',' ')},*/	/* Dangme */
485/*{"??",	HB_TAG('E','C','R',' ')},*/	/* Eastern Cree */
486/*{"??",	HB_TAG('F','A','N',' ')},*/	/* French Antillean */
487/*{"??",	HB_TAG('F','L','E',' ')},*/	/* Flemish */
488/*{"??",	HB_TAG('F','N','E',' ')},*/	/* Forest Nenets */
489/*{"??",	HB_TAG('F','T','A',' ')},*/	/* Futa */
490/*{"??",	HB_TAG('G','A','R',' ')},*/	/* Garshuni */
491/*{"??",	HB_TAG('G','E','Z',' ')},*/	/* Ge'ez */
492/*{"??",	HB_TAG('H','A','L',' ')},*/	/* Halam */
493/*{"??",	HB_TAG('H','A','R',' ')},*/	/* Harauti */
494/*{"??",	HB_TAG('H','A','W',' ')},*/	/* Hawaiin */
495/*{"??",	HB_TAG('H','B','N',' ')},*/	/* Hammer-Banna */
496/*{"??",	HB_TAG('H','M','A',' ')},*/	/* High Mari */
497/*{"??",	HB_TAG('H','N','D',' ')},*/	/* Hindko */
498/*{"??",	HB_TAG('I','J','O',' ')},*/	/* Ijo */
499/*{"??",	HB_TAG('I','L','O',' ')},*/	/* Ilokano */
500/*{"??",	HB_TAG('I','R','T',' ')},*/	/* Irish Traditional */
501/*{"??",	HB_TAG('J','U','L',' ')},*/	/* Jula */
502/*{"??",	HB_TAG('K','A','R',' ')},*/	/* Karachay */
503/*{"??",	HB_TAG('K','E','B',' ')},*/	/* Kebena */
504/*{"??",	HB_TAG('K','G','E',' ')},*/	/* Khutsuri Georgian */
505/*{"??",	HB_TAG('K','H','A',' ')},*/	/* Khakass */
506/*{"??",	HB_TAG('K','H','K',' ')},*/	/* Khanty-Kazim */
507/*{"??",	HB_TAG('K','H','S',' ')},*/	/* Khanty-Shurishkar */
508/*{"??",	HB_TAG('K','H','V',' ')},*/	/* Khanty-Vakhi */
509/*{"??",	HB_TAG('K','I','S',' ')},*/	/* Kisii */
510/*{"??",	HB_TAG('K','K','N',' ')},*/	/* Kokni */
511/*{"??",	HB_TAG('K','M','S',' ')},*/	/* Komso */
512/*{"??",	HB_TAG('K','O','D',' ')},*/	/* Kodagu */
513/*{"??",	HB_TAG('K','O','H',' ')},*/	/* Korean Old Hangul */
514/*{"??",	HB_TAG('K','O','N',' ')},*/	/* Kikongo */
515/*{"??",	HB_TAG('K','R','K',' ')},*/	/* Karakalpak */
516/*{"??",	HB_TAG('K','R','N',' ')},*/	/* Karen */
517/*{"??",	HB_TAG('K','U','L',' ')},*/	/* Kulvi */
518/*{"??",	HB_TAG('L','A','H',' ')},*/	/* Lahuli */
519/*{"??",	HB_TAG('L','A','M',' ')},*/	/* Lambani */
520/*{"??",	HB_TAG('L','C','R',' ')},*/	/* L-Cree */
521/*{"??",	HB_TAG('L','E','Z',' ')},*/	/* Lezgi */
522/*{"??",	HB_TAG('L','M','A',' ')},*/	/* Low Mari */
523/*{"??",	HB_TAG('L','U','B',' ')},*/	/* Luba */
524/*{"??",	HB_TAG('L','U','G',' ')},*/	/* Luganda */
525/*{"??",	HB_TAG('L','U','H',' ')},*/	/* Luhya */
526/*{"??",	HB_TAG('M','A','K',' ')},*/	/* Makua */
527/*{"??",	HB_TAG('M','A','L',' ')},*/	/* Malayalam Traditional */
528/*{"??",	HB_TAG('M','B','N',' ')},*/	/* Mbundu */
529/*{"??",	HB_TAG('M','I','Z',' ')},*/	/* Mizo */
530/*{"??",	HB_TAG('M','L','N',' ')},*/	/* Malinke */
531/*{"??",	HB_TAG('M','N','K',' ')},*/	/* Maninka */
532/*{"??",	HB_TAG('M','O','R',' ')},*/	/* Moroccan */
533/*{"??",	HB_TAG('N','A','G',' ')},*/	/* Naga-Assamese */
534/*{"??",	HB_TAG('N','C','R',' ')},*/	/* N-Cree */
535/*{"??",	HB_TAG('N','D','B',' ')},*/	/* Ndebele */
536/*{"??",	HB_TAG('N','G','R',' ')},*/	/* Nagari */
537/*{"??",	HB_TAG('N','H','C',' ')},*/	/* Norway House Cree */
538/*{"??",	HB_TAG('N','K','L',' ')},*/	/* Nkole */
539/*{"??",	HB_TAG('N','T','A',' ')},*/	/* Northern Tai */
540/*{"??",	HB_TAG('O','C','R',' ')},*/	/* Oji-Cree */
541/*{"??",	HB_TAG('P','A','A',' ')},*/	/* Palestinian Aramaic */
542/*{"??",	HB_TAG('P','G','R',' ')},*/	/* Polytonic Greek */
543/*{"??",	HB_TAG('P','L','G',' ')},*/	/* Palaung */
544/*{"??",	HB_TAG('Q','I','N',' ')},*/	/* Chin */
545/*{"??",	HB_TAG('R','B','U',' ')},*/	/* Russian Buriat */
546/*{"??",	HB_TAG('R','C','R',' ')},*/	/* R-Cree */
547/*{"??",	HB_TAG('R','M','S',' ')},*/	/* Rhaeto-Romanic */
548/*{"??",	HB_TAG('R','U','A',' ')},*/	/* Ruanda */
549/*{"??",	HB_TAG('S','A','Y',' ')},*/	/* Sayisi */
550/*{"??",	HB_TAG('S','E','K',' ')},*/	/* Sekota */
551/*{"??",	HB_TAG('S','I','G',' ')},*/	/* Silte Gurage */
552/*{"??",	HB_TAG('S','L','A',' ')},*/	/* Slavey */
553/*{"??",	HB_TAG('S','O','G',' ')},*/	/* Sodo Gurage */
554/*{"??",	HB_TAG('S','O','T',' ')},*/	/* Sotho */
555/*{"??",	HB_TAG('S','W','A',' ')},*/	/* Swadaya Aramaic */
556/*{"??",	HB_TAG('S','W','Z',' ')},*/	/* Swazi */
557/*{"??",	HB_TAG('S','X','T',' ')},*/	/* Sutu */
558/*{"??",	HB_TAG('T','A','B',' ')},*/	/* Tabasaran */
559/*{"??",	HB_TAG('T','C','R',' ')},*/	/* TH-Cree */
560/*{"??",	HB_TAG('T','G','N',' ')},*/	/* Tongan */
561/*{"??",	HB_TAG('T','M','N',' ')},*/	/* Temne */
562/*{"??",	HB_TAG('T','N','E',' ')},*/	/* Tundra Nenets */
563/*{"??",	HB_TAG('T','O','D',' ')},*/	/* Todo */
564/*{"??",	HB_TAG('T','U','A',' ')},*/	/* Turoyo Aramaic */
565/*{"??",	HB_TAG('T','U','V',' ')},*/	/* Tuvin */
566/*{"??",	HB_TAG('W','C','R',' ')},*/	/* West-Cree */
567/*{"??",	HB_TAG('X','B','D',' ')},*/	/* Tai Lue */
568/*{"??",	HB_TAG('Y','C','R',' ')},*/	/* Y-Cree */
569/*{"??",	HB_TAG('Y','I','C',' ')},*/	/* Yi Classic */
570/*{"??",	HB_TAG('Y','I','M',' ')},*/	/* Yi Modern */
571/*{"??",	HB_TAG('Z','H','P',' ')},*/	/* Chinese Phonetic */
572};
573
574static int
575lang_compare_first_component (const char *a,
576			      const char *b)
577{
578  unsigned int da, db;
579  const char *p;
580
581  p = strstr (a, "-");
582  da = p ? (unsigned int) (p - a) : strlen (a);
583
584  p = strstr (b, "-");
585  db = p ? (unsigned int) (p - b) : strlen (b);
586
587  return strncmp (a, b, MAX (da, db));
588}
589
590static hb_bool_t
591lang_matches (const char *lang_str, const char *spec)
592{
593  unsigned int len = strlen (spec);
594
595  return lang_str && strncmp (lang_str, spec, len) == 0 &&
596	 (lang_str[len] == '\0' || lang_str[len] == '-');
597}
598
599hb_tag_t
600hb_ot_tag_from_language (hb_language_t language)
601{
602  const char *lang_str;
603  LangTag *lang_tag;
604
605  if (language == NULL)
606    return HB_OT_TAG_DEFAULT_LANGUAGE;
607
608  lang_str = hb_language_to_string (language);
609
610  if (0 == strncmp (lang_str, "x-hbot", 6)) {
611    char tag[4];
612    int i;
613    lang_str += 6;
614    for (i = 0; i < 4 && ISALPHA (lang_str[i]); i++)
615      tag[i] = TOUPPER (lang_str[i]);
616    for (; i < 4; i++)
617      tag[i] = ' ';
618    return HB_TAG_CHAR4 (tag);
619  }
620
621  /* find a language matching in the first component */
622  lang_tag = bsearch (lang_str, ot_languages,
623		      ARRAY_LENGTH (ot_languages), sizeof (LangTag),
624		      (hb_compare_func_t) lang_compare_first_component);
625
626  /* we now need to find the best language matching */
627  if (lang_tag)
628  {
629    hb_bool_t found = FALSE;
630
631    /* go to the final one matching in the first component */
632    while (lang_tag + 1 < ot_languages + ARRAY_LENGTH (ot_languages) &&
633	   lang_compare_first_component (lang_str, (lang_tag + 1)->language) == 0)
634      lang_tag++;
635
636    /* go back, find which one matches completely */
637    while (lang_tag >= ot_languages &&
638	   lang_compare_first_component (lang_str, lang_tag->language) == 0)
639    {
640      if (lang_matches (lang_str, lang_tag->language)) {
641	found = TRUE;
642	break;
643      }
644
645      lang_tag--;
646    }
647
648    if (!found)
649      lang_tag = NULL;
650  }
651
652  if (lang_tag)
653    return lang_tag->tag;
654
655  return HB_OT_TAG_DEFAULT_LANGUAGE;
656}
657
658hb_language_t
659hb_ot_tag_to_language (hb_tag_t tag)
660{
661  unsigned int i;
662  unsigned char buf[11] = "x-hbot";
663
664  for (i = 0; i < ARRAY_LENGTH (ot_languages); i++)
665    if (ot_languages[i].tag == tag)
666      return hb_language_from_string (ot_languages[i].language);
667
668  buf[6] = tag >> 24;
669  buf[7] = (tag >> 16) & 0xFF;
670  buf[8] = (tag >> 8) & 0xFF;
671  buf[9] = tag & 0xFF;
672  buf[10] = '\0';
673  return hb_language_from_string ((char *) buf);
674}
675
676
677HB_END_DECLS
678