1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 **********************************************************************
5 *   Copyright (C) 1996-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *
9 * Provides functionality for mapping between
10 * LCID and Posix IDs or ICU locale to codepage
11 *
12 * Note: All classes and code in this file are
13 *       intended for internal use only.
14 *
15 * Methods of interest:
16 *   unsigned long convertToLCID(const char*);
17 *   const char* convertToPosix(unsigned long);
18 *
19 * Kathleen Wilson, 4/30/96
20 *
21 *  Date        Name        Description
22 *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
23 *                          setId() method and safety check against
24 *                          MAX_ID_LENGTH.
25 * 04/23/99     stephen     Added C wrapper for convertToPosix.
26 * 09/18/00     george      Removed the memory leaks.
27 * 08/23/01     george      Convert to C
28 */
29
30#include "locmap.h"
31#include "cstring.h"
32#include "cmemory.h"
33#include "unicode/uloc.h"
34
35#if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
36/*
37 * TODO: It seems like we should widen this to
38 * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
39 * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
40 * but those use gcc and won't have defined(_MSC_VER).
41 * We might need to #include some Windows header and test for some version macro from there.
42 * Or call some Windows function and see what it returns.
43 */
44#define USE_WINDOWS_LCID_MAPPING_API
45#include <windows.h>
46#include <winnls.h>
47#endif
48
49/*
50 * Note:
51 * The mapping from Win32 locale ID numbers to POSIX locale strings should
52 * be the faster one.
53 *
54 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
55 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
56 */
57
58/*
59////////////////////////////////////////////////
60//
61// Internal Classes for LCID <--> POSIX Mapping
62//
63/////////////////////////////////////////////////
64*/
65
66typedef struct ILcidPosixElement
67{
68    const uint32_t hostID;
69    const char * const posixID;
70} ILcidPosixElement;
71
72typedef struct ILcidPosixMap
73{
74    const uint32_t numRegions;
75    const struct ILcidPosixElement* const regionMaps;
76} ILcidPosixMap;
77
78
79/*
80/////////////////////////////////////////////////
81//
82// Easy macros to make the LCID <--> POSIX Mapping
83//
84/////////////////////////////////////////////////
85*/
86
87/**
88 * The standard one language/one country mapping for LCID.
89 * The first element must be the language, and the following
90 * elements are the language with the country.
91 * @param hostID LCID in host format such as 0x044d
92 * @param languageID posix ID of just the language such as 'de'
93 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
94 */
95#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
96static const ILcidPosixElement locmap_ ## languageID [] = { \
97    {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
98    {hostID, #posixID}, \
99};
100
101/**
102 * Define a subtable by ID
103 * @param id the POSIX ID, either a language or language_TERRITORY
104 */
105#define ILCID_POSIX_SUBTABLE(id) \
106static const ILcidPosixElement locmap_ ## id [] =
107
108
109/**
110 * Create the map for the posixID. This macro supposes that the language string
111 * name is the same as the global variable name, and that the first element
112 * in the ILcidPosixElement is just the language.
113 * @param _posixID the full POSIX ID for this entry.
114 */
115#define ILCID_POSIX_MAP(_posixID) \
116    {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
117
118/*
119////////////////////////////////////////////
120//
121// Create the table of LCID to POSIX Mapping
122// None of it should be dynamically created.
123//
124// Keep static locale variables inside the function so that
125// it can be created properly during static init.
126//
127// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
128//       (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
129//
130//       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be
131//       maintained for support of older Windows version.
132//       Update: Windows 7 (091130)
133//
134// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
135//       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
136//       called from uloc_getLCID(), keywords other than collation are already removed. If we really need
137//       to support other keywords in this mapping data, we must update the implementation.
138////////////////////////////////////////////
139*/
140
141// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
142// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
143
144ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
145
146ILCID_POSIX_SUBTABLE(ar) {
147    {0x01,   "ar"},
148    {0x3801, "ar_AE"},
149    {0x3c01, "ar_BH"},
150    {0x1401, "ar_DZ"},
151    {0x0c01, "ar_EG"},
152    {0x0801, "ar_IQ"},
153    {0x2c01, "ar_JO"},
154    {0x3401, "ar_KW"},
155    {0x3001, "ar_LB"},
156    {0x1001, "ar_LY"},
157    {0x1801, "ar_MA"},
158    {0x1801, "ar_MO"},
159    {0x2001, "ar_OM"},
160    {0x4001, "ar_QA"},
161    {0x0401, "ar_SA"},
162    {0x2801, "ar_SY"},
163    {0x1c01, "ar_TN"},
164    {0x2401, "ar_YE"}
165};
166
167ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
168ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
169ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
170
171ILCID_POSIX_SUBTABLE(az) {
172    {0x2c,   "az"},
173    {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */
174    {0x742c, "az_Cyrl"},  /* Cyrillic based */
175    {0x042c, "az_Latn_AZ"}, /* Latin based */
176    {0x782c, "az_Latn"}, /* Latin based */
177    {0x042c, "az_AZ"} /* Latin based */
178};
179
180ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
181ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
182
183/*ILCID_POSIX_SUBTABLE(ber) {
184    {0x5f,   "ber"},
185    {0x045f, "ber_Arab_DZ"},
186    {0x045f, "ber_Arab"},
187    {0x085f, "ber_Latn_DZ"},
188    {0x085f, "ber_Latn"}
189};*/
190
191ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
192
193ILCID_POSIX_SUBTABLE(bin) {
194    {0x66, "bin"},
195    {0x0466, "bin_NG"}
196};
197
198ILCID_POSIX_SUBTABLE(bn) {
199    {0x45,   "bn"},
200    {0x0845, "bn_BD"},
201    {0x0445, "bn_IN"}
202};
203
204ILCID_POSIX_SUBTABLE(bo) {
205    {0x51,   "bo"},
206    {0x0851, "bo_BT"},
207    {0x0451, "bo_CN"},
208    {0x0c51, "dz_BT"}
209};
210
211ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
212
213ILCID_POSIX_SUBTABLE(ca) {
214    {0x03,   "ca"},
215    {0x0403, "ca_ES"},
216    {0x0803, "ca_ES_VALENCIA"}
217};
218
219ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
220
221ILCID_POSIX_SUBTABLE(chr) {
222    {0x05c,  "chr"},
223    {0x7c5c, "chr_Cher"},
224    {0x045c, "chr_Cher_US"},
225    {0x045c, "chr_US"}
226};
227
228// ICU has chosen different names for these.
229ILCID_POSIX_SUBTABLE(ckb) {
230    {0x92,   "ckb"},
231    {0x7c92, "ckb_Arab"},
232    {0x0492, "ckb_Arab_IQ"}
233};
234
235/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
236ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
237
238ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
239ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
240
241// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
242ILCID_POSIX_SUBTABLE(de) {
243    {0x07,   "de"},
244    {0x0c07, "de_AT"},
245    {0x0807, "de_CH"},
246    {0x0407, "de_DE"},
247    {0x1407, "de_LI"},
248    {0x1007, "de_LU"},
249    {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/
250    {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/
251};
252
253ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
254ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
255
256// Windows uses an empty string for 'invariant'
257ILCID_POSIX_SUBTABLE(en) {
258    {0x09,   "en"},
259    {0x0c09, "en_AU"},
260    {0x2809, "en_BZ"},
261    {0x1009, "en_CA"},
262    {0x0809, "en_GB"},
263    {0x3c09, "en_HK"},
264    {0x3809, "en_ID"},
265    {0x1809, "en_IE"},
266    {0x4009, "en_IN"},
267    {0x2009, "en_JM"},
268    {0x4409, "en_MY"},
269    {0x1409, "en_NZ"},
270    {0x3409, "en_PH"},
271    {0x4809, "en_SG"},
272    {0x2C09, "en_TT"},
273    {0x0409, "en_US"},
274    {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
275    {0x2409, "en_029"},
276    {0x1c09, "en_ZA"},
277    {0x3009, "en_ZW"},
278    {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
279    {0x0409, "en_AS"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
280    {0x0409, "en_GU"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
281    {0x0409, "en_MH"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
282    {0x0409, "en_MP"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
283    {0x0409, "en_UM"}   /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
284};
285
286ILCID_POSIX_SUBTABLE(en_US_POSIX) {
287    {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
288};
289
290// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
291ILCID_POSIX_SUBTABLE(es) {
292    {0x0a,   "es"},
293    {0x2c0a, "es_AR"},
294    {0x400a, "es_BO"},
295    {0x340a, "es_CL"},
296    {0x240a, "es_CO"},
297    {0x140a, "es_CR"},
298    {0x5c0a, "es_CU"},
299    {0x1c0a, "es_DO"},
300    {0x300a, "es_EC"},
301    {0x0c0a, "es_ES"},      /*Modern sort.*/
302    {0x100a, "es_GT"},
303    {0x480a, "es_HN"},
304    {0x080a, "es_MX"},
305    {0x4c0a, "es_NI"},
306    {0x180a, "es_PA"},
307    {0x280a, "es_PE"},
308    {0x500a, "es_PR"},
309    {0x3c0a, "es_PY"},
310    {0x440a, "es_SV"},
311    {0x540a, "es_US"},
312    {0x380a, "es_UY"},
313    {0x200a, "es_VE"},
314    {0x580a, "es_419"},
315    {0x040a, "es_ES@collation=traditional"},
316    {0x040a, "es@collation=traditional"}        // Windows will treat this as es-ES@collation=traditional
317};
318
319ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
320ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
321
322/* ISO-639 doesn't distinguish between Persian and Dari.*/
323ILCID_POSIX_SUBTABLE(fa) {
324    {0x29,   "fa"},
325    {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */
326    {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
327};
328
329
330/* duplicate for roundtripping */
331ILCID_POSIX_SUBTABLE(fa_AF) {
332    {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */
333    {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
334};
335
336ILCID_POSIX_SUBTABLE(ff) {
337    {0x67,   "ff"},
338    {0x7c67, "ff_Latn"},
339    {0x0867, "ff_Latn_SN"},
340    {0x0467, "ff_NG"}
341};
342
343ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
344ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
345ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
346
347ILCID_POSIX_SUBTABLE(fr) {
348    {0x0c,   "fr"},
349    {0x080c, "fr_BE"},
350    {0x0c0c, "fr_CA"},
351    {0x240c, "fr_CD"},
352    {0x240c, "fr_CG"},
353    {0x100c, "fr_CH"},
354    {0x300c, "fr_CI"},
355    {0x2c0c, "fr_CM"},
356    {0x040c, "fr_FR"},
357    {0x3c0c, "fr_HT"},
358    {0x140c, "fr_LU"},
359    {0x380c, "fr_MA"},
360    {0x180c, "fr_MC"},
361    {0x340c, "fr_ML"},
362    {0x200c, "fr_RE"},
363    {0x280c, "fr_SN"},
364    {0xe40c, "fr_015"},
365    {0x1c0c, "fr_029"}
366};
367
368ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
369
370ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
371
372ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
373    {0x3c,   "ga"},
374    {0x083c, "ga_IE"},
375    {0x043c, "gd_GB"}
376};
377
378ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
379    {0x91,   "gd"},
380    {0x0491, "gd_GB"}
381};
382
383ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
384ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
385ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
386ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
387
388ILCID_POSIX_SUBTABLE(ha) {
389    {0x68,   "ha"},
390    {0x7c68, "ha_Latn"},
391    {0x0468, "ha_Latn_NG"},
392};
393
394ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
395ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
396ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
397
398/* This LCID is really four different locales.*/
399ILCID_POSIX_SUBTABLE(hr) {
400    {0x1a,   "hr"},
401    {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */
402    {0x681a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */
403    {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */
404    {0x781a, "bs"},     /* Bosnian */
405    {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */
406    {0x641a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */
407    {0x101a, "hr_BA"},  /* Croatian in Bosnia */
408    {0x041a, "hr_HR"},  /* Croatian*/
409    {0x2c1a, "sr_Latn_ME"},
410    {0x241a, "sr_Latn_RS"},
411    {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
412    {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
413    {0x701a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */
414    {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
415    {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
416    {0x301a, "sr_Cyrl_ME"},
417    {0x281a, "sr_Cyrl_RS"},
418    {0x6c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
419    {0x7c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */
420};
421
422ILCID_POSIX_SUBTABLE(hsb) {
423    {0x2E,   "hsb"},
424    {0x042E, "hsb_DE"},
425    {0x082E, "dsb_DE"},
426    {0x7C2E, "dsb"},
427};
428
429ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
430ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
431
432ILCID_POSIX_SUBTABLE(ibb) {
433    {0x69, "ibb"},
434    {0x0469, "ibb_NG"}
435};
436
437ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
438ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
439ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
440ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
441
442ILCID_POSIX_SUBTABLE(it) {
443    {0x10,   "it"},
444    {0x0810, "it_CH"},
445    {0x0410, "it_IT"}
446};
447
448ILCID_POSIX_SUBTABLE(iu) {
449    {0x5d,   "iu"},
450    {0x045d, "iu_Cans_CA"},
451    {0x785d, "iu_Cans"},
452    {0x085d, "iu_Latn_CA"},
453    {0x7c5d, "iu_Latn"}
454};
455
456ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
457ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
458ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
459ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
460ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
461ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
462ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
463
464ILCID_POSIX_SUBTABLE(ko) {
465    {0x12,   "ko"},
466    {0x0812, "ko_KP"},
467    {0x0412, "ko_KR"}
468};
469
470ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
471ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)
472
473ILCID_POSIX_SUBTABLE(ks) {         /* We could add PK and CN too */
474    {0x60,   "ks"},
475    {0x0460, "ks_Arab_IN"},
476    {0x0860, "ks_Deva_IN"}
477};
478
479ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
480
481ILCID_POSIX_SUBTABLE(la) {
482    {0x76,   "la"},
483    {0x0476, "la_001"},
484    {0x0476, "la_IT"}       /*Left in for compatibility*/
485};
486
487ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
488ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
489ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
490ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
491ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
492ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
493ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
494
495ILCID_POSIX_SUBTABLE(mn) {
496    {0x50,   "mn"},
497    {0x0450, "mn_MN"},
498    {0x7c50, "mn_Mong"},
499    {0x0850, "mn_Mong_CN"},
500    {0x0850, "mn_CN"},
501    {0x7850, "mn_Cyrl"},
502    {0x0c50, "mn_Mong_MN"}
503};
504
505ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
506ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
507ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
508
509ILCID_POSIX_SUBTABLE(ms) {
510    {0x3e,   "ms"},
511    {0x083e, "ms_BN"},   /* Brunei Darussalam*/
512    {0x043e, "ms_MY"}    /* Malaysia*/
513};
514
515ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
516ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
517
518ILCID_POSIX_SUBTABLE(ne) {
519    {0x61,   "ne"},
520    {0x0861, "ne_IN"},   /* India*/
521    {0x0461, "ne_NP"}    /* Nepal*/
522};
523
524ILCID_POSIX_SUBTABLE(nl) {
525    {0x13,   "nl"},
526    {0x0813, "nl_BE"},
527    {0x0413, "nl_NL"}
528};
529
530/* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
531// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
532ILCID_POSIX_SUBTABLE(no) {
533    {0x14,   "no"},     /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
534    {0x7c14, "nb"},     /* really nb */
535    {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */
536    {0x0414, "no_NO"},  /* really nb_NO */
537    {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */
538    {0x7814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */
539    {0x0814, "no_NO_NY"}/* really nn_NO */
540};
541
542ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
543ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
544
545ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
546    {0x72,   "om"},
547    {0x0472, "om_ET"},
548    {0x0472, "gaz_ET"}
549};
550
551/* Declared as or_IN to get around compiler errors*/
552ILCID_POSIX_SUBTABLE(or_IN) {
553    {0x48,   "or"},
554    {0x0448, "or_IN"},
555};
556
557ILCID_POSIX_SUBTABLE(pa) {
558    {0x46,   "pa"},
559    {0x0446, "pa_IN"},
560    {0x0846, "pa_Arab_PK"},
561    {0x0846, "pa_PK"}
562};
563
564ILCID_POSIX_SUBTABLE(pap) {
565    {0x79, "pap"},
566    {0x0479, "pap_029"},
567    {0x0479, "pap_AN"}     /*Left in for compatibility*/
568};
569
570ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
571ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
572
573ILCID_POSIX_SUBTABLE(pt) {
574    {0x16,   "pt"},
575    {0x0416, "pt_BR"},
576    {0x0816, "pt_PT"}
577};
578
579ILCID_POSIX_SUBTABLE(qu) {
580    {0x6b,   "qu"},
581    {0x046b, "qu_BO"},
582    {0x086b, "qu_EC"},
583    {0x0C6b, "qu_PE"},
584    {0x046b, "quz_BO"},
585    {0x086b, "quz_EC"},
586    {0x0C6b, "quz_PE"}
587};
588
589ILCID_POSIX_SUBTABLE(quc) {
590    {0x93,   "quc"},
591    {0x0493, "quc_CO"},
592    /*
593        "quc_Latn_GT" is an exceptional case. Language ID of "quc"
594        is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
595        under the group of "qut". "qut" is a retired ISO 639-3 language
596        code for West Central Quiche, and merged to "quc".
597        It looks Windows previously reserved "qut" for K'iche', but,
598        decided to use "quc" when adding a locale for K'iche' (Guatemala).
599
600        This data structure used here assumes language ID bits in
601        LCID is unique for alphabetic language code. But this is not true
602        for "quc_Latn_GT". If we don't have the data below, LCID look up
603        by alphabetic locale ID (POSIX) will fail. The same entry is found
604        under "qut" below, which is required for reverse look up.
605    */
606    {0x0486, "quc_Latn_GT"}
607};
608
609ILCID_POSIX_SUBTABLE(qut) {
610    {0x86,   "qut"},
611    {0x0486, "qut_GT"},
612    /*
613        See the note in "quc" above.
614    */
615    {0x0486, "quc_Latn_GT"}
616};
617
618ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
619
620ILCID_POSIX_SUBTABLE(ro) {
621    {0x18,   "ro"},
622    {0x0418, "ro_RO"},
623    {0x0818, "ro_MD"}
624};
625
626// TODO: This is almost certainly 'wrong'.  0 in Windows is a synonym for LOCALE_USER_DEFAULT.
627// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
628// (Except that it's not invariant in ICU)
629ILCID_POSIX_SUBTABLE(root) {
630    {0x00,   "root"}
631};
632
633ILCID_POSIX_SUBTABLE(ru) {
634    {0x19,   "ru"},
635    {0x0419, "ru_RU"},
636    {0x0819, "ru_MD"}
637};
638
639ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
640ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
641ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
642
643ILCID_POSIX_SUBTABLE(sd) {
644    {0x59,   "sd"},
645    {0x0459, "sd_Deva_IN"},
646    {0x0459, "sd_IN"},
647    {0x0859, "sd_Arab_PK"},
648    {0x0859, "sd_PK"},
649    {0x7c59, "sd_Arab"}
650};
651
652ILCID_POSIX_SUBTABLE(se) {
653    {0x3b,   "se"},
654    {0x0c3b, "se_FI"},
655    {0x043b, "se_NO"},
656    {0x083b, "se_SE"},
657    {0x783b, "sma"},
658    {0x183b, "sma_NO"},
659    {0x1c3b, "sma_SE"},
660    {0x7c3b, "smj"},
661    {0x703b, "smn"},
662    {0x743b, "sms"},
663    {0x103b, "smj_NO"},
664    {0x143b, "smj_SE"},
665    {0x243b, "smn_FI"},
666    {0x203b, "sms_FI"},
667};
668
669ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
670ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
671ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
672
673ILCID_POSIX_SUBTABLE(so) {
674    {0x77,   "so"},
675    {0x0477, "so_SO"}
676};
677
678ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
679ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
680
681ILCID_POSIX_SUBTABLE(sv) {
682    {0x1d,   "sv"},
683    {0x081d, "sv_FI"},
684    {0x041d, "sv_SE"}
685};
686
687ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
688ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
689
690ILCID_POSIX_SUBTABLE(ta) {
691    {0x49,   "ta"},
692    {0x0449, "ta_IN"},
693    {0x0849, "ta_LK"}
694};
695
696ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
697
698/* Cyrillic based by default */
699ILCID_POSIX_SUBTABLE(tg) {
700    {0x28,   "tg"},
701    {0x7c28, "tg_Cyrl"},
702    {0x0428, "tg_Cyrl_TJ"}
703};
704
705ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
706
707ILCID_POSIX_SUBTABLE(ti) {
708    {0x73,   "ti"},
709    {0x0873, "ti_ER"},
710    {0x0473, "ti_ET"}
711};
712
713ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
714
715ILCID_POSIX_SUBTABLE(tn) {
716    {0x32,   "tn"},
717    {0x0832, "tn_BW"},
718    {0x0432, "tn_ZA"}
719};
720
721ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
722ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
723ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
724
725ILCID_POSIX_SUBTABLE(tzm) {
726    {0x5f,   "tzm"},
727    {0x7c5f, "tzm_Latn"},
728    {0x085f, "tzm_Latn_DZ"},
729    {0x105f, "tzm_Tfng_MA"},
730    {0x045f, "tzm_Arab_MA"},
731    {0x045f, "tmz"}
732};
733
734ILCID_POSIX_SUBTABLE(ug) {
735    {0x80,   "ug"},
736    {0x0480, "ug_CN"},
737    {0x0480, "ug_Arab_CN"}
738};
739
740ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
741
742ILCID_POSIX_SUBTABLE(ur) {
743    {0x20,   "ur"},
744    {0x0820, "ur_IN"},
745    {0x0420, "ur_PK"}
746};
747
748ILCID_POSIX_SUBTABLE(uz) {
749    {0x43,   "uz"},
750    {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */
751    {0x7843, "uz_Cyrl"},  /* Cyrillic based */
752    {0x0843, "uz_UZ"},  /* Cyrillic based */
753    {0x0443, "uz_Latn_UZ"}, /* Latin based */
754    {0x7c43, "uz_Latn"} /* Latin based */
755};
756
757ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
758    {0x33,   "ve"},
759    {0x0433, "ve_ZA"},
760    {0x0433, "ven_ZA"}
761};
762
763ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
764ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
765ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
766
767ILCID_POSIX_SUBTABLE(yi) {
768    {0x003d, "yi"},
769    {0x043d, "yi_001"}
770};
771
772ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
773
774// Windows & ICU tend to different names for some of these
775// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
776ILCID_POSIX_SUBTABLE(zh) {
777    {0x0004, "zh_Hans"},
778    {0x7804, "zh"},
779    {0x0804, "zh_CN"},
780    {0x0804, "zh_Hans_CN"},
781    {0x0c04, "zh_Hant_HK"},
782    {0x0c04, "zh_HK"},
783    {0x1404, "zh_Hant_MO"},
784    {0x1404, "zh_MO"},
785    {0x1004, "zh_Hans_SG"},
786    {0x1004, "zh_SG"},
787    {0x0404, "zh_Hant_TW"},
788    {0x7c04, "zh_Hant"},
789    {0x0404, "zh_TW"},
790    {0x30404,"zh_Hant_TW"},     /* Bopomofo order */
791    {0x30404,"zh_TW"},          /* Bopomofo order */
792    {0x20004,"zh@collation=stroke"},
793    {0x20404,"zh_Hant@collation=stroke"},
794    {0x20404,"zh_Hant_TW@collation=stroke"},
795    {0x20404,"zh_TW@collation=stroke"},
796    {0x20804,"zh_Hans@collation=stroke"},
797    {0x20804,"zh_Hans_CN@collation=stroke"},
798    {0x20804,"zh_CN@collation=stroke"}
799    // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
800};
801
802ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
803
804/* This must be static and grouped by LCID. */
805static const ILcidPosixMap gPosixIDmap[] = {
806    ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
807    ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
808    ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
809    ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */
810    ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
811    ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
812    ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */
813    ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */
814/*    ILCID_POSIX_MAP(ber),     ber Berber/Tamazight          0x5f */
815    ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
816    ILCID_POSIX_MAP(bin),   /*  bin Edo                       0x66 */
817    ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
818    ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */
819    ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */
820    ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
821    ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */
822    ILCID_POSIX_MAP(ckb),   /*  ckb Sorani (Central Kurdish)  0x92 */
823    ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */
824    ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */
825    ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */
826    ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
827    ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
828    ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */
829    ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
830    ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
831    ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
832    ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
833    ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
834    ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
835    ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */
836    ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */
837    ILCID_POSIX_MAP(ff),    /*  ff  Fula                      0x67 */
838    ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
839    ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */
840    ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
841    ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
842    ILCID_POSIX_MAP(fuv),   /*  fuv Fulfulde - Nigeria        0x67 */
843    ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */
844    ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */
845    ILCID_POSIX_MAP(gd),    /*  gd  Gaelic (United Kingdom)   0x91 */
846    ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
847    ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */
848    ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */
849    ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
850    ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */
851    ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */
852    ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
853    ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
854    ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */
855    ILCID_POSIX_MAP(hsb),   /*  hsb Upper Sorbian             0x2e */
856    ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
857    ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
858    ILCID_POSIX_MAP(ibb),   /*  ibb Ibibio - Nigeria          0x69 */
859    ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
860    ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */
861    ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */
862    ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
863    ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
864    ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */
865    ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
866    ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
867    ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
868    ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
869    ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */
870    ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */
871    ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
872    ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
873    ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
874    ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */
875    ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
876    ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
877    ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */
878    ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */
879    ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */
880    ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
881    ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
882    ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */
883    ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
884    ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
885    ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
886    ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
887    ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */
888    ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
889    ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
890    ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
891    ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */
892/*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
893    ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
894    ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
895/*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
896    ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */
897    ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */
898    ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */
899    ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */
900    ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
901    ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
902    ILCID_POSIX_MAP(pap),   /*  pap Papiamentu                0x79 */
903    ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
904    ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */
905    ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
906    ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */
907    ILCID_POSIX_MAP(quc),   /*  quc K'iche                    0x93 */
908    ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */
909    ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */
910    ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
911    ILCID_POSIX_MAP(root),  /*  root                          0x00 */
912    ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
913    ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */
914    ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
915    ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */
916    ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
917    ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */
918/*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
919    ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */
920    ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
921    ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
922    ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */
923    ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
924/*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
925    ILCID_POSIX_MAP(st),    /*  st  Sutu                      0x30 */
926    ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
927    ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
928    ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
929    ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
930    ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
931    ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */
932    ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
933    ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */
934    ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */
935    ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */
936    ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
937    ILCID_POSIX_MAP(ts),    /*  ts  Tsonga                    0x31 */
938    ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
939    ILCID_POSIX_MAP(tzm),   /*  tzm Tamazight                 0x5f */
940    ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */
941    ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
942    ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
943    ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
944    ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */
945    ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
946    ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */
947    ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */
948    ILCID_POSIX_MAP(yi),    /*  yi  Yiddish                   0x3d */
949    ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */
950    ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
951    ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
952};
953
954static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
955
956/**
957 * Do not call this function. It is called by hostID.
958 * The function is not private because this struct must stay as a C struct,
959 * and this is an internal class.
960 */
961static int32_t
962idCmp(const char* id1, const char* id2)
963{
964    int32_t diffIdx = 0;
965    while (*id1 == *id2 && *id1 != 0) {
966        diffIdx++;
967        id1++;
968        id2++;
969    }
970    return diffIdx;
971}
972
973/**
974 * Searches for a Windows LCID
975 *
976 * @param posixid the Posix style locale id.
977 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
978 *               no equivalent Windows LCID.
979 * @return the LCID
980 */
981static uint32_t
982getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
983{
984    int32_t bestIdx = 0;
985    int32_t bestIdxDiff = 0;
986    int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
987    uint32_t idx;
988
989    for (idx = 0; idx < this_0->numRegions; idx++ ) {
990        int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
991        if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
992            if (posixIDlen == sameChars) {
993                /* Exact match */
994                return this_0->regionMaps[idx].hostID;
995            }
996            bestIdxDiff = sameChars;
997            bestIdx = idx;
998        }
999    }
1000    /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
1001    /* We also have to make sure that sid and si and similar string subsets don't match. */
1002    if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
1003        && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
1004    {
1005        *status = U_USING_FALLBACK_WARNING;
1006        return this_0->regionMaps[bestIdx].hostID;
1007    }
1008
1009    /*no match found */
1010    *status = U_ILLEGAL_ARGUMENT_ERROR;
1011    return this_0->regionMaps->hostID;
1012}
1013
1014static const char*
1015getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
1016{
1017    uint32_t i;
1018    for (i = 0; i <= this_0->numRegions; i++)
1019    {
1020        if (this_0->regionMaps[i].hostID == hostID)
1021        {
1022            return this_0->regionMaps[i].posixID;
1023        }
1024    }
1025
1026    /* If you get here, then no matching region was found,
1027       so return the language id with the wild card region. */
1028    return this_0->regionMaps[0].posixID;
1029}
1030
1031/*
1032//////////////////////////////////////
1033//
1034// LCID --> POSIX
1035//
1036/////////////////////////////////////
1037*/
1038#ifdef USE_WINDOWS_LCID_MAPPING_API
1039/*
1040 * Various language tags needs to be changed:
1041 * quz -> qu
1042 * prs -> fa
1043 */
1044#define FIX_LANGUAGE_ID_TAG(buffer, len) \
1045    if (len >= 3) { \
1046        if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1047            buffer[2] = 0; \
1048            uprv_strcat(buffer, buffer+3); \
1049        } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1050            buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1051            uprv_strcat(buffer, buffer+3); \
1052        } \
1053    }
1054
1055#endif
1056U_CAPI int32_t
1057uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1058{
1059    uint16_t langID;
1060    uint32_t localeIndex;
1061    UBool bLookup = TRUE;
1062    const char *pPosixID = NULL;
1063
1064#ifdef USE_WINDOWS_LCID_MAPPING_API
1065    char locName[LOCALE_NAME_MAX_LENGTH] = {};      // ICU name can't be longer than Windows name
1066
1067    // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1068    // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1069    // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1070    // use the Windows API to resolve locale ID for this specific case.
1071    if ((hostid & 0x3FF) != 0x92) {
1072        int32_t tmpLen = 0;
1073        UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH];  // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH
1074
1075        // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1076        tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
1077        if (tmpLen > 1) {
1078            int32_t i = 0;
1079            // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
1080            bLookup = FALSE;
1081            for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1082            {
1083                locName[i] = (char)(windowsLocaleName[i]);
1084
1085                // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1086                // In such cases, we need special mapping data found in the hardcoded table
1087                // in this source file.
1088                if (windowsLocaleName[i] == L'_')
1089                {
1090                    // Keep the base locale, without variant
1091                    // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1092                    locName[i] = '\0';
1093                    tmpLen = i;
1094                    bLookup = TRUE;
1095                    break;
1096                }
1097                else if (windowsLocaleName[i] == L'-')
1098                {
1099                    // Windows names use -, ICU uses _
1100                    locName[i] = '_';
1101                }
1102                else if (windowsLocaleName[i] == L'\0')
1103                {
1104                    // No point in doing more work than necessary
1105                    break;
1106                }
1107            }
1108            // TODO: Need to understand this better, why isn't it an alias?
1109            FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1110            pPosixID = locName;
1111        }
1112    }
1113#endif // USE_WINDOWS_LCID_MAPPING_API
1114
1115    if (bLookup) {
1116        const char *pCandidate = NULL;
1117        langID = LANGUAGE_LCID(hostid);
1118
1119        for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1120            if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1121                pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1122                break;
1123            }
1124        }
1125
1126        /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1127           If a match in the hardcoded table is longer than the Windows locale name without
1128           variant, we use the one as the result */
1129        if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1130            pPosixID = pCandidate;
1131        }
1132    }
1133
1134    if (pPosixID) {
1135        int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
1136        int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1137        uprv_memcpy(posixID, pPosixID, copyLen);
1138        if (resLen < posixIDCapacity) {
1139            posixID[resLen] = 0;
1140            if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1141                *status = U_ZERO_ERROR;
1142            }
1143        } else if (resLen == posixIDCapacity) {
1144            *status = U_STRING_NOT_TERMINATED_WARNING;
1145        } else {
1146            *status = U_BUFFER_OVERFLOW_ERROR;
1147        }
1148        return resLen;
1149    }
1150
1151    /* no match found */
1152    *status = U_ILLEGAL_ARGUMENT_ERROR;
1153    return -1;
1154}
1155
1156/*
1157//////////////////////////////////////
1158//
1159// POSIX --> LCID
1160// This should only be called from uloc_getLCID.
1161// The locale ID must be in canonical form.
1162//
1163/////////////////////////////////////
1164*/
1165U_CAPI uint32_t
1166uprv_convertToLCIDPlatform(const char* localeID)
1167{
1168    // The purpose of this function is to leverage native platform name->lcid
1169    // conversion functionality when available.
1170#ifdef USE_WINDOWS_LCID_MAPPING_API
1171    DWORD nameLCIDFlags = 0;
1172    UErrorCode myStatus = U_ZERO_ERROR;
1173
1174    // First check for a Windows name->LCID match, fall through to catch
1175    // ICU special cases, but Windows may know it already.
1176#if LOCALE_ALLOW_NEUTRAL_NAMES
1177    nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES;
1178#endif /* LOCALE_ALLOW_NEUTRAL_NAMES */
1179
1180    int32_t len;
1181    char collVal[ULOC_KEYWORDS_CAPACITY] = {};
1182    char baseName[ULOC_FULLNAME_CAPACITY] = {};
1183    const char * mylocaleID = localeID;
1184
1185    // Check any for keywords.
1186    if (uprv_strchr(localeID, '@'))
1187    {
1188        len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus);
1189        if (U_SUCCESS(myStatus) && len > 0)
1190        {
1191            // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1192            return 0;
1193        }
1194        else
1195        {
1196            // If the locale ID contains keywords other than collation, just use the base name.
1197            len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus);
1198
1199            if (U_SUCCESS(myStatus) && len > 0)
1200            {
1201                baseName[len] = 0;
1202                mylocaleID = baseName;
1203            }
1204        }
1205    }
1206
1207    char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1208    // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
1209    (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
1210
1211    if (U_SUCCESS(myStatus))
1212    {
1213        // Need it to be UTF-16, not 8-bit
1214        wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1215        int32_t i;
1216        for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1217        {
1218            if (asciiBCP47Tag[i] == '\0')
1219            {
1220                break;
1221            }
1222            else
1223            {
1224                // Copy the character
1225                bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1226            }
1227        }
1228
1229        if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1230        {
1231            // Ensure it's null terminated
1232            bcp47Tag[i] = L'\0';
1233            LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags);
1234            if (lcid > 0)
1235            {
1236                // Found LCID from windows, return that one, unless its completely ambiguous
1237                // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1238                // for this process.
1239                if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1240                {
1241                    return lcid;
1242                }
1243            }
1244        }
1245    }
1246#else
1247    (void)localeID; // Suppress unused variable warning.
1248#endif /* USE_WINDOWS_LCID_MAPPING_API */
1249
1250    // No found, or not implemented on platforms without native name->lcid conversion
1251    return 0;
1252}
1253
1254U_CAPI uint32_t
1255uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1256{
1257    // This function does the table lookup when native platform name->lcid conversion isn't available,
1258    // or for locales that don't follow patterns the platform expects.
1259    uint32_t   low    = 0;
1260    uint32_t   high   = gLocaleCount;
1261    uint32_t   mid;
1262    uint32_t   oldmid = 0;
1263    int32_t    compVal;
1264
1265    uint32_t   value         = 0;
1266    uint32_t   fallbackValue = (uint32_t)-1;
1267    UErrorCode myStatus;
1268    uint32_t   idx;
1269
1270    /* Check for incomplete id. */
1271    if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1272        return 0;
1273    }
1274
1275    /*Binary search for the map entry for normal cases */
1276
1277    while (high > low)  /*binary search*/{
1278
1279        mid = (high+low) >> 1; /*Finds median*/
1280
1281        if (mid == oldmid)
1282            break;
1283
1284        compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1285        if (compVal < 0){
1286            high = mid;
1287        }
1288        else if (compVal > 0){
1289            low = mid;
1290        }
1291        else /*we found it*/{
1292            return getHostID(&gPosixIDmap[mid], posixID, status);
1293        }
1294        oldmid = mid;
1295    }
1296
1297    /*
1298     * Sometimes we can't do a binary search on posixID because some LCIDs
1299     * go to different locales.  We hit one of those special cases.
1300     */
1301    for (idx = 0; idx < gLocaleCount; idx++ ) {
1302        myStatus = U_ZERO_ERROR;
1303        value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1304        if (myStatus == U_ZERO_ERROR) {
1305            return value;
1306        }
1307        else if (myStatus == U_USING_FALLBACK_WARNING) {
1308            fallbackValue = value;
1309        }
1310    }
1311
1312    if (fallbackValue != (uint32_t)-1) {
1313        *status = U_USING_FALLBACK_WARNING;
1314        return fallbackValue;
1315    }
1316
1317    /* no match found */
1318    *status = U_ILLEGAL_ARGUMENT_ERROR;
1319    return 0;   /* return international (root) */
1320}
1321