1/*
2**********************************************************************
3*   Copyright (C) 1997-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   04/01/97    aliu        Creation.
13*   08/21/98    stephen     JDK 1.2 sync
14*   12/08/98    rtg         New Locale implementation and C API
15*   03/15/99    damiba      overhaul.
16*   04/06/99    stephen     changed setDefault() to realloc and copy
17*   06/14/99    stephen     Changed calls to ures_open for new params
18*   07/21/99    stephen     Modified setDefault() to propagate to C++
19*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
20*                           brought canonicalization code into line with spec
21*****************************************************************************/
22
23/*
24   POSIX's locale format, from putil.c: [no spaces]
25
26     ll [ _CC ] [ . MM ] [ @ VV]
27
28     l = lang, C = ctry, M = charmap, V = variant
29*/
30
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
34
35#include "putilimp.h"
36#include "ustr_imp.h"
37#include "ulocimp.h"
38#include "umutex.h"
39#include "cstring.h"
40#include "cmemory.h"
41#include "ucln_cmn.h"
42#include "locmap.h"
43#include "uarrsort.h"
44#include "uenumimp.h"
45#include "uassert.h"
46
47#include <stdio.h> /* for sprintf */
48
49/* ### Declarations **************************************************/
50
51/* Locale stuff from locid.cpp */
52U_CFUNC void locale_set_default(const char *id);
53U_CFUNC const char *locale_get_default(void);
54U_CFUNC int32_t
55locale_getKeywords(const char *localeID,
56            char prev,
57            char *keywords, int32_t keywordCapacity,
58            char *values, int32_t valuesCapacity, int32_t *valLen,
59            UBool valuesToo,
60            UErrorCode *status);
61
62/* ### Data tables **************************************************/
63
64/**
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible.  Includes 3-letter codes
67 * that lack a 2-letter equivalent.
68 *
69 * This list must be in sorted order.  This list is returned directly
70 * to the user by some API.
71 *
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
73 * entries matched.
74 *
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry.  The first list is visible to
77 * user code when this array is returned by API.  The second list
78 * contains codes we support, but do not expose through user API.
79 *
80 * Notes
81 *
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
84 *
85 * The 3 character codes are the terminology codes like RFC 3066.  This
86 * is compatible with prior ICU codes
87 *
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates.  This avoids bad searches going from 3 to 2 character
91 * codes.
92 *
93 * The range qaa-qtz is reserved for local use
94 */
95/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
96/* ISO639 table version is 20130123 */
97static const char * const LANGUAGES[] = {
98    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",
99    "afa", "afh", "agq", "ain", "ak",  "akk", "ale", "alg",
100    "alt", "am",  "an",  "ang", "anp", "apa", "ar",  "arc",
101    "arn", "arp", "art", "arw", "as",  "asa", "ast", "ath",
102    "aus", "av",  "awa", "ay",  "az",
103    "ba",  "bad", "bai", "bal", "ban", "bas", "bat", "bax",
104    "bbj", "be",  "bej", "bem", "ber", "bez", "bfd", "bg",
105    "bh",  "bho", "bi",  "bik", "bin", "bkm", "bla", "bm",
106    "bn",  "bnt", "bo",  "br",  "bra", "brx", "bs",  "bss",
107    "btk", "bua", "bug", "bum", "byn", "byv",
108    "ca",  "cad", "cai", "car", "cau", "cay", "cch", "ce",
109    "ceb", "cel", "cgg", "ch",  "chb", "chg", "chk", "chm",
110    "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
111    "cop", "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",
112    "csb", "cu",  "cus", "cv",  "cy",
113    "da",  "dak", "dar", "dav", "day", "de",  "del", "den",
114    "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
115    "dv",  "dyo", "dyu", "dz",  "dzg",
116    "ebu", "ee",  "efi", "egy", "eka", "el",  "elx", "en",
117    "enm", "eo",  "es",  "et",  "eu",  "ewo",
118    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",
119    "fo",  "fon", "fr",  "frm", "fro", "frr", "frs", "fur",
120    "fy",
121    "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
122    "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
123    "grc", "gsw", "gu",  "guz", "gv",  "gwi",
124    "ha",  "hai", "haw", "he",  "hi",  "hil", "him", "hit",
125    "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",
126    "hz",
127    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ijo",
128    "ik",  "ilo", "inc", "ine", "inh", "io",  "ira", "iro",
129    "is",  "it",  "iu",
130    "ja",  "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
131    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
132    "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg",  "kha",
133    "khi", "kho", "khq", "ki",  "kj",  "kk",  "kkj", "kl",
134    "kln", "km",  "kmb", "kn",  "ko",  "kok", "kos", "kpe",
135    "kr",  "krc", "krl", "kro", "kru", "ks",  "ksb", "ksf",
136    "ksh", "ku",  "kum", "kut", "kv",  "kw",  "ky",
137    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lg",
138    "li",  "lkt", "ln",  "lo",  "lol", "loz", "lt",  "lu",
139    "lua", "lui", "lun", "luo", "lus", "luy", "lv",
140    "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
141    "mde", "mdf", "mdr", "men", "mer", "mfe", "mg",  "mga",
142    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
143    "mkh", "ml",  "mn",  "mnc", "mni", "mno", "mo",  "moh",
144    "mos", "mr",  "ms",  "mt",  "mua", "mul", "mun", "mus",
145    "mwl", "mwr", "my",  "mye", "myn", "myv",
146    "na",  "nah", "nai", "nap", "naq", "nb",  "nd",  "nds",
147    "ne",  "new", "ng",  "nia", "nic", "niu", "nl",  "nmg",
148    "nn",  "nnh", "no",  "nog", "non", "nqo", "nr",  "nso",
149    "nub", "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo",
150    "nzi",
151    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota", "oto",
152    "pa",  "paa", "pag", "pal", "pam", "pap", "pau", "peo",
153    "phi", "phn", "pi",  "pl",  "pon", "pra", "pro", "ps",
154    "pt",
155    "qu",
156    "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rof",
157    "rom", "ru",  "rup", "rw",  "rwk",
158    "sa",  "sad", "sah", "sai", "sal", "sam", "saq", "sas",
159    "sat", "sba", "sbp", "sc",  "scn", "sco", "sd",  "se",
160    "see", "seh", "sel", "sem", "ses", "sg",  "sga", "sgn",
161    "shi", "shn", "shu", "si",  "sid", "sio", "sit",
162    "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
163    "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
164    "srn", "srr", "ss",  "ssa", "ssy", "st",  "su",  "suk",
165    "sus", "sux", "sv",  "sw",  "swb", "swc", "syc", "syr",
166    "ta",  "tai", "te",  "tem", "teo", "ter", "tet", "tg",
167    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tl",  "tlh",
168    "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",  "trv",
169    "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
170    "twq", "ty",  "tyv", "tzm",
171    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
172    "vai", "ve",  "vi",  "vo",  "vot", "vun",
173    "wa",  "wae", "wak", "wal", "war", "was", "wen", "wo",
174    "xal", "xh",  "xog",
175    "yao", "yap", "yav", "ybb", "yi",  "yo",  "ypk", "yue",
176    "za",  "zap", "zbl", "zen", "zh",  "znd", "zu",  "zun",
177    "zxx", "zza",
178NULL,
179    "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
180NULL
181};
182
183static const char* const DEPRECATED_LANGUAGES[]={
184    "in", "iw", "ji", "jw", NULL, NULL
185};
186static const char* const REPLACEMENT_LANGUAGES[]={
187    "id", "he", "yi", "jv", NULL, NULL
188};
189
190/**
191 * Table of 3-letter language codes.
192 *
193 * This is a lookup table used to convert 3-letter language codes to
194 * their 2-letter equivalent, where possible.  It must be kept in sync
195 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
196 * same language as LANGUAGES_3[i].  The commented-out lines are
197 * copied from LANGUAGES to make eyeballing this baby easier.
198 *
199 * Where a 3-letter language code has no 2-letter equivalent, the
200 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
201 *
202 * This table should be terminated with a NULL entry, followed by a
203 * second list, and another NULL entry.  The two lists correspond to
204 * the two lists in LANGUAGES.
205 */
206/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
207/* ISO639 table version is 20130123 */
208static const char * const LANGUAGES_3[] = {
209    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
210    "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
211    "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
212    "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
213    "aus", "ava", "awa", "aym", "aze",
214    "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
215    "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
216    "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
217    "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
218    "btk", "bua", "bug", "bum", "byn", "byv",
219    "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
220    "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
221    "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
222    "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
223    "csb", "chu", "cus", "chv", "cym",
224    "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
225    "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
226    "div", "dyo", "dyu", "dzo", "dzg",
227    "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
228    "enm", "epo", "spa", "est", "eus", "ewo",
229    "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
230    "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
231    "fry",
232    "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
233    "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
234    "grc", "gsw", "guj", "guz", "glv", "gwi",
235    "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
236    "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
237    "her",
238    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
239    "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
240    "isl", "ita", "iku",
241    "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
242    "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
243    "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
244    "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
245    "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
246    "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
247    "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
248    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
249    "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
250    "lua", "lui", "lun", "luo", "lus", "luy", "lav",
251    "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
252    "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
253    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
254    "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
255    "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
256    "mwl", "mwr", "mya", "mye", "myn", "myv",
257    "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
258    "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
259    "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
260    "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
261    "nzi",
262    "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
263    "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
264    "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
265    "por",
266    "que",
267    "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
268    "rom", "rus", "rup", "kin", "rwk",
269    "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
270    "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
271    "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
272    "shi", "shn", "shu", "sin", "sid", "sio", "sit",
273    "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
274    "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
275    "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
276    "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
277    "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
278    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
279    "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
280    "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
281    "twq", "tah", "tyv", "tzm",
282    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
283    "vai", "ven", "vie", "vol", "vot", "vun",
284    "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
285    "xal", "xho", "xog",
286    "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
287    "zha", "zap", "zbl", "zen", "zho", "znd", "zul", "zun",
288    "zxx", "zza",
289NULL,
290/*  "in",  "iw",  "ji",  "jw",  "sh",                          */
291    "ind", "heb", "yid", "jaw", "srp",
292NULL
293};
294
295/**
296 * Table of 2-letter country codes.
297 *
298 * This list must be in sorted order.  This list is returned directly
299 * to the user by some API.
300 *
301 * This list must be kept in sync with COUNTRIES_3, with corresponding
302 * entries matched.
303 *
304 * This table should be terminated with a NULL entry, followed by a
305 * second list, and another NULL entry.  The first list is visible to
306 * user code when this array is returned by API.  The second list
307 * contains codes we support, but do not expose through user API.
308 *
309 * Notes:
310 *
311 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
312 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
313 * new codes keeping the old ones for compatibility updated to include
314 * 1999/12/03 revisions *CWB*
315 *
316 * RO(ROM) is now RO(ROU) according to
317 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
318 */
319static const char * const COUNTRIES[] = {
320    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
321    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
322    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
323    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
324    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
325    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
326    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
327    "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
328    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
329    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
330    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
331    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
332    "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
333    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
334    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
335    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
336    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
337    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
338    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
339    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
340    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
341    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
342    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
343    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
344    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
345    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
346    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
347    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
348    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
349    "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
350NULL,
351    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
352NULL
353};
354
355static const char* const DEPRECATED_COUNTRIES[] = {
356    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
357};
358static const char* const REPLACEMENT_COUNTRIES[] = {
359/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
360    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
361};
362
363/**
364 * Table of 3-letter country codes.
365 *
366 * This is a lookup table used to convert 3-letter country codes to
367 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
368 * For all valid i, COUNTRIES[i] must refer to the same country as
369 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
370 * to make eyeballing this baby easier.
371 *
372 * This table should be terminated with a NULL entry, followed by a
373 * second list, and another NULL entry.  The two lists correspond to
374 * the two lists in COUNTRIES.
375 */
376static const char * const COUNTRIES_3[] = {
377/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
378    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
379/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
380    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
381/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
382    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
383/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
384    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
385/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
386    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
387/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
388    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
389/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
390    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
391/*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
392    "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
393/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
394    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
395/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
396    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
397/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
398    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
399/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
400    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
401/*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
402    "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
403/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
404    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
405/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
406    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
407/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
408    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
409/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
410    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
411/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
412    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
413/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
414    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
415/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
416    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
417/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
418    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
419/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
420    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
421/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
422    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
423/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
424    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
425/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
426    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
427/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
428    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
429/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
430    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
431/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
432    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
433/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
434    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
435/*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
436    "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
437NULL,
438/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
439    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
440NULL
441};
442
443typedef struct CanonicalizationMap {
444    const char *id;          /* input ID */
445    const char *canonicalID; /* canonicalized output ID */
446    const char *keyword;     /* keyword, or NULL if none */
447    const char *value;       /* keyword value, or NULL if kw==NULL */
448} CanonicalizationMap;
449
450/**
451 * A map to canonicalize locale IDs.  This handles a variety of
452 * different semantic kinds of transformations.
453 */
454static const CanonicalizationMap CANONICALIZE_MAP[] = {
455    { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
456    { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
457    { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
458    { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
459    { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
460    { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
461    { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
462    { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
463    { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
464    { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
465    { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
466    { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
467    { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
468    { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
469    { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
470    { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
471    { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
472    { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
473    { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
474    { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
475    { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
476    { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
477    { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
478    { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
479    { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
480    { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
481    { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
482    { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
483    { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
484    { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
485    { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
486    { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
487    { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
488    { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
489    { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
490    { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
491    { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
492    { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
493    { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
494    { "zh_GAN",         "gan", NULL, NULL }, /* registered name */
495    { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
496    { "zh_HAKKA",       "hak", NULL, NULL }, /* registered name */
497    { "zh_MIN_NAN",     "nan", NULL, NULL }, /* registered name */
498    { "zh_WUU",         "wuu", NULL, NULL }, /* registered name */
499    { "zh_XIANG",       "hsn", NULL, NULL }, /* registered name */
500    { "zh_YUE",         "yue", NULL, NULL }, /* registered name */
501};
502
503typedef struct VariantMap {
504    const char *variant;          /* input ID */
505    const char *keyword;     /* keyword, or NULL if none */
506    const char *value;       /* keyword value, or NULL if kw==NULL */
507} VariantMap;
508
509static const VariantMap VARIANT_MAP[] = {
510    { "EURO",   "currency", "EUR" },
511    { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
512    { "STROKE", "collation", "stroke" }  /* Solaris variant */
513};
514
515/* ### BCP47 Conversion *******************************************/
516/* Test if the locale id has BCP47 u extension and does not have '@' */
517#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
518/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
519#define _ConvertBCP47(finalID, id, buffer, length,err) \
520        if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
521            finalID=id; \
522        } else { \
523            finalID=buffer; \
524        }
525/* Gets the size of the shortest subtag in the given localeID. */
526static int32_t getShortestSubtagLength(const char *localeID) {
527    int32_t localeIDLength = uprv_strlen(localeID);
528    int32_t length = localeIDLength;
529    int32_t tmpLength = 0;
530    int32_t i;
531    UBool reset = TRUE;
532
533    for (i = 0; i < localeIDLength; i++) {
534        if (localeID[i] != '_' && localeID[i] != '-') {
535            if (reset) {
536                tmpLength = 0;
537                reset = FALSE;
538            }
539            tmpLength++;
540        } else {
541            if (tmpLength != 0 && tmpLength < length) {
542                length = tmpLength;
543            }
544            reset = TRUE;
545        }
546    }
547
548    return length;
549}
550
551/* ### Keywords **************************************************/
552
553#define ULOC_KEYWORD_BUFFER_LEN 25
554#define ULOC_MAX_NO_KEYWORDS 25
555
556U_CAPI const char * U_EXPORT2
557locale_getKeywordsStart(const char *localeID) {
558    const char *result = NULL;
559    if((result = uprv_strchr(localeID, '@')) != NULL) {
560        return result;
561    }
562#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
563    else {
564        /* We do this because the @ sign is variant, and the @ sign used on one
565        EBCDIC machine won't be compiled the same way on other EBCDIC based
566        machines. */
567        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
568        const uint8_t *charToFind = ebcdicSigns;
569        while(*charToFind) {
570            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
571                return result;
572            }
573            charToFind++;
574        }
575    }
576#endif
577    return NULL;
578}
579
580/**
581 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
582 * @param keywordName incoming name to be canonicalized
583 * @param status return status (keyword too long)
584 * @return length of the keyword name
585 */
586static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
587{
588  int32_t i;
589  int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
590
591  if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
592    /* keyword name too long for internal buffer */
593    *status = U_INTERNAL_PROGRAM_ERROR;
594          return 0;
595  }
596
597  /* normalize the keyword name */
598  for(i = 0; i < keywordNameLen; i++) {
599    buf[i] = uprv_tolower(keywordName[i]);
600  }
601  buf[i] = 0;
602
603  return keywordNameLen;
604}
605
606typedef struct {
607    char keyword[ULOC_KEYWORD_BUFFER_LEN];
608    int32_t keywordLen;
609    const char *valueStart;
610    int32_t valueLen;
611} KeywordStruct;
612
613static int32_t U_CALLCONV
614compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
615    const char* leftString = ((const KeywordStruct *)left)->keyword;
616    const char* rightString = ((const KeywordStruct *)right)->keyword;
617    return uprv_strcmp(leftString, rightString);
618}
619
620/**
621 * Both addKeyword and addValue must already be in canonical form.
622 * Either both addKeyword and addValue are NULL, or neither is NULL.
623 * If they are not NULL they must be zero terminated.
624 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
625 */
626static int32_t
627_getKeywords(const char *localeID,
628             char prev,
629             char *keywords, int32_t keywordCapacity,
630             char *values, int32_t valuesCapacity, int32_t *valLen,
631             UBool valuesToo,
632             const char* addKeyword,
633             const char* addValue,
634             UErrorCode *status)
635{
636    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
637
638    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
639    int32_t numKeywords = 0;
640    const char* pos = localeID;
641    const char* equalSign = NULL;
642    const char* semicolon = NULL;
643    int32_t i = 0, j, n;
644    int32_t keywordsLen = 0;
645    int32_t valuesLen = 0;
646
647    if(prev == '@') { /* start of keyword definition */
648        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
649        do {
650            UBool duplicate = FALSE;
651            /* skip leading spaces */
652            while(*pos == ' ') {
653                pos++;
654            }
655            if (!*pos) { /* handle trailing "; " */
656                break;
657            }
658            if(numKeywords == maxKeywords) {
659                *status = U_INTERNAL_PROGRAM_ERROR;
660                return 0;
661            }
662            equalSign = uprv_strchr(pos, '=');
663            semicolon = uprv_strchr(pos, ';');
664            /* lack of '=' [foo@currency] is illegal */
665            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
666            if(!equalSign || (semicolon && semicolon<equalSign)) {
667                *status = U_INVALID_FORMAT_ERROR;
668                return 0;
669            }
670            /* need to normalize both keyword and keyword name */
671            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
672                /* keyword name too long for internal buffer */
673                *status = U_INTERNAL_PROGRAM_ERROR;
674                return 0;
675            }
676            for(i = 0, n = 0; i < equalSign - pos; ++i) {
677                if (pos[i] != ' ') {
678                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
679                }
680            }
681            keywordList[numKeywords].keyword[n] = 0;
682            keywordList[numKeywords].keywordLen = n;
683            /* now grab the value part. First we skip the '=' */
684            equalSign++;
685            /* then we leading spaces */
686            while(*equalSign == ' ') {
687                equalSign++;
688            }
689            keywordList[numKeywords].valueStart = equalSign;
690
691            pos = semicolon;
692            i = 0;
693            if(pos) {
694                while(*(pos - i - 1) == ' ') {
695                    i++;
696                }
697                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
698                pos++;
699            } else {
700                i = (int32_t)uprv_strlen(equalSign);
701                while(i && equalSign[i-1] == ' ') {
702                    i--;
703                }
704                keywordList[numKeywords].valueLen = i;
705            }
706            /* If this is a duplicate keyword, then ignore it */
707            for (j=0; j<numKeywords; ++j) {
708                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
709                    duplicate = TRUE;
710                    break;
711                }
712            }
713            if (!duplicate) {
714                ++numKeywords;
715            }
716        } while(pos);
717
718        /* Handle addKeyword/addValue. */
719        if (addKeyword != NULL) {
720            UBool duplicate = FALSE;
721            U_ASSERT(addValue != NULL);
722            /* Search for duplicate; if found, do nothing. Explicit keyword
723               overrides addKeyword. */
724            for (j=0; j<numKeywords; ++j) {
725                if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
726                    duplicate = TRUE;
727                    break;
728                }
729            }
730            if (!duplicate) {
731                if (numKeywords == maxKeywords) {
732                    *status = U_INTERNAL_PROGRAM_ERROR;
733                    return 0;
734                }
735                uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
736                keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
737                keywordList[numKeywords].valueStart = addValue;
738                keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
739                ++numKeywords;
740            }
741        } else {
742            U_ASSERT(addValue == NULL);
743        }
744
745        /* now we have a list of keywords */
746        /* we need to sort it */
747        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
748
749        /* Now construct the keyword part */
750        for(i = 0; i < numKeywords; i++) {
751            if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
752                uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
753                if(valuesToo) {
754                    keywords[keywordsLen + keywordList[i].keywordLen] = '=';
755                } else {
756                    keywords[keywordsLen + keywordList[i].keywordLen] = 0;
757                }
758            }
759            keywordsLen += keywordList[i].keywordLen + 1;
760            if(valuesToo) {
761                if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
762                    uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
763                }
764                keywordsLen += keywordList[i].valueLen;
765
766                if(i < numKeywords - 1) {
767                    if(keywordsLen < keywordCapacity) {
768                        keywords[keywordsLen] = ';';
769                    }
770                    keywordsLen++;
771                }
772            }
773            if(values) {
774                if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
775                    uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
776                    values[valuesLen + keywordList[i].valueLen] = 0;
777                }
778                valuesLen += keywordList[i].valueLen + 1;
779            }
780        }
781        if(values) {
782            values[valuesLen] = 0;
783            if(valLen) {
784                *valLen = valuesLen;
785            }
786        }
787        return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
788    } else {
789        return 0;
790    }
791}
792
793U_CFUNC int32_t
794locale_getKeywords(const char *localeID,
795                   char prev,
796                   char *keywords, int32_t keywordCapacity,
797                   char *values, int32_t valuesCapacity, int32_t *valLen,
798                   UBool valuesToo,
799                   UErrorCode *status) {
800    return _getKeywords(localeID, prev, keywords, keywordCapacity,
801                        values, valuesCapacity, valLen, valuesToo,
802                        NULL, NULL, status);
803}
804
805U_CAPI int32_t U_EXPORT2
806uloc_getKeywordValue(const char* localeID,
807                     const char* keywordName,
808                     char* buffer, int32_t bufferCapacity,
809                     UErrorCode* status)
810{
811    const char* startSearchHere = NULL;
812    const char* nextSeparator = NULL;
813    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
814    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
815    int32_t i = 0;
816    int32_t result = 0;
817
818    if(status && U_SUCCESS(*status) && localeID) {
819      char tempBuffer[ULOC_FULLNAME_CAPACITY];
820      const char* tmpLocaleID;
821
822      if (_hasBCP47Extension(localeID)) {
823          _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
824      } else {
825          tmpLocaleID=localeID;
826      }
827
828      startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
829      if(startSearchHere == NULL) {
830          /* no keywords, return at once */
831          return 0;
832      }
833
834      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
835      if(U_FAILURE(*status)) {
836        return 0;
837      }
838
839      /* find the first keyword */
840      while(startSearchHere) {
841          startSearchHere++;
842          /* skip leading spaces (allowed?) */
843          while(*startSearchHere == ' ') {
844              startSearchHere++;
845          }
846          nextSeparator = uprv_strchr(startSearchHere, '=');
847          /* need to normalize both keyword and keyword name */
848          if(!nextSeparator) {
849              break;
850          }
851          if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
852              /* keyword name too long for internal buffer */
853              *status = U_INTERNAL_PROGRAM_ERROR;
854              return 0;
855          }
856          for(i = 0; i < nextSeparator - startSearchHere; i++) {
857              localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
858          }
859          /* trim trailing spaces */
860          while(startSearchHere[i-1] == ' ') {
861              i--;
862              U_ASSERT(i>=0);
863          }
864          localeKeywordNameBuffer[i] = 0;
865
866          startSearchHere = uprv_strchr(nextSeparator, ';');
867
868          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
869              nextSeparator++;
870              while(*nextSeparator == ' ') {
871                  nextSeparator++;
872              }
873              /* we actually found the keyword. Copy the value */
874              if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
875                  while(*(startSearchHere-1) == ' ') {
876                      startSearchHere--;
877                  }
878                  uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
879                  result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
880              } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
881                  i = (int32_t)uprv_strlen(nextSeparator);
882                  while(nextSeparator[i - 1] == ' ') {
883                      i--;
884                  }
885                  uprv_strncpy(buffer, nextSeparator, i);
886                  result = u_terminateChars(buffer, bufferCapacity, i, status);
887              } else {
888                  /* give a bigger buffer, please */
889                  *status = U_BUFFER_OVERFLOW_ERROR;
890                  if(startSearchHere) {
891                      result = (int32_t)(startSearchHere - nextSeparator);
892                  } else {
893                      result = (int32_t)uprv_strlen(nextSeparator);
894                  }
895              }
896              return result;
897          }
898      }
899    }
900    return 0;
901}
902
903U_CAPI int32_t U_EXPORT2
904uloc_setKeywordValue(const char* keywordName,
905                     const char* keywordValue,
906                     char* buffer, int32_t bufferCapacity,
907                     UErrorCode* status)
908{
909    /* TODO: sorting. removal. */
910    int32_t keywordNameLen;
911    int32_t keywordValueLen;
912    int32_t bufLen;
913    int32_t needLen = 0;
914    int32_t foundValueLen;
915    int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
916    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
917    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
918    int32_t i = 0;
919    int32_t rc;
920    char* nextSeparator = NULL;
921    char* nextEqualsign = NULL;
922    char* startSearchHere = NULL;
923    char* keywordStart = NULL;
924    char *insertHere = NULL;
925    if(U_FAILURE(*status)) {
926        return -1;
927    }
928    if(bufferCapacity>1) {
929        bufLen = (int32_t)uprv_strlen(buffer);
930    } else {
931        *status = U_ILLEGAL_ARGUMENT_ERROR;
932        return 0;
933    }
934    if(bufferCapacity<bufLen) {
935        /* The capacity is less than the length?! Is this NULL terminated? */
936        *status = U_ILLEGAL_ARGUMENT_ERROR;
937        return 0;
938    }
939    if(keywordValue && !*keywordValue) {
940        keywordValue = NULL;
941    }
942    if(keywordValue) {
943        keywordValueLen = (int32_t)uprv_strlen(keywordValue);
944    } else {
945        keywordValueLen = 0;
946    }
947    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
948    if(U_FAILURE(*status)) {
949        return 0;
950    }
951    startSearchHere = (char*)locale_getKeywordsStart(buffer);
952    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
953        if(!keywordValue) { /* no keywords = nothing to remove */
954            return bufLen;
955        }
956
957        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
958        if(startSearchHere) { /* had a single @ */
959            needLen--; /* already had the @ */
960            /* startSearchHere points at the @ */
961        } else {
962            startSearchHere=buffer+bufLen;
963        }
964        if(needLen >= bufferCapacity) {
965            *status = U_BUFFER_OVERFLOW_ERROR;
966            return needLen; /* no change */
967        }
968        *startSearchHere = '@';
969        startSearchHere++;
970        uprv_strcpy(startSearchHere, keywordNameBuffer);
971        startSearchHere += keywordNameLen;
972        *startSearchHere = '=';
973        startSearchHere++;
974        uprv_strcpy(startSearchHere, keywordValue);
975        startSearchHere+=keywordValueLen;
976        return needLen;
977    } /* end shortcut - no @ */
978
979    keywordStart = startSearchHere;
980    /* search for keyword */
981    while(keywordStart) {
982        keywordStart++;
983        /* skip leading spaces (allowed?) */
984        while(*keywordStart == ' ') {
985            keywordStart++;
986        }
987        nextEqualsign = uprv_strchr(keywordStart, '=');
988        /* need to normalize both keyword and keyword name */
989        if(!nextEqualsign) {
990            break;
991        }
992        if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
993            /* keyword name too long for internal buffer */
994            *status = U_INTERNAL_PROGRAM_ERROR;
995            return 0;
996        }
997        for(i = 0; i < nextEqualsign - keywordStart; i++) {
998            localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
999        }
1000        /* trim trailing spaces */
1001        while(keywordStart[i-1] == ' ') {
1002            i--;
1003        }
1004        U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
1005        localeKeywordNameBuffer[i] = 0;
1006
1007        nextSeparator = uprv_strchr(nextEqualsign, ';');
1008        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1009        if(rc == 0) {
1010            nextEqualsign++;
1011            while(*nextEqualsign == ' ') {
1012                nextEqualsign++;
1013            }
1014            /* we actually found the keyword. Change the value */
1015            if (nextSeparator) {
1016                keywordAtEnd = 0;
1017                foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
1018            } else {
1019                keywordAtEnd = 1;
1020                foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1021            }
1022            if(keywordValue) { /* adding a value - not removing */
1023              if(foundValueLen == keywordValueLen) {
1024                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1025                return bufLen; /* no change in size */
1026              } else if(foundValueLen > keywordValueLen) {
1027                int32_t delta = foundValueLen - keywordValueLen;
1028                if(nextSeparator) { /* RH side */
1029                  uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1030                }
1031                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1032                bufLen -= delta;
1033                buffer[bufLen]=0;
1034                return bufLen;
1035              } else { /* FVL < KVL */
1036                int32_t delta = keywordValueLen - foundValueLen;
1037                if((bufLen+delta) >= bufferCapacity) {
1038                  *status = U_BUFFER_OVERFLOW_ERROR;
1039                  return bufLen+delta;
1040                }
1041                if(nextSeparator) { /* RH side */
1042                  uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1043                }
1044                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1045                bufLen += delta;
1046                buffer[bufLen]=0;
1047                return bufLen;
1048              }
1049            } else { /* removing a keyword */
1050              if(keywordAtEnd) {
1051                /* zero out the ';' or '@' just before startSearchhere */
1052                keywordStart[-1] = 0;
1053                return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1054              } else {
1055                uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1056                keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1057                return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1058              }
1059            }
1060        } else if(rc<0){ /* end match keyword */
1061          /* could insert at this location. */
1062          insertHere = keywordStart;
1063        }
1064        keywordStart = nextSeparator;
1065    } /* end loop searching */
1066
1067    if(!keywordValue) {
1068      return bufLen; /* removal of non-extant keyword - no change */
1069    }
1070
1071    /* we know there is at least one keyword. */
1072    needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1073    if(needLen >= bufferCapacity) {
1074        *status = U_BUFFER_OVERFLOW_ERROR;
1075        return needLen; /* no change */
1076    }
1077
1078    if(insertHere) {
1079      uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1080      keywordStart = insertHere;
1081    } else {
1082      keywordStart = buffer+bufLen;
1083      *keywordStart = ';';
1084      keywordStart++;
1085    }
1086    uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1087    keywordStart += keywordNameLen;
1088    *keywordStart = '=';
1089    keywordStart++;
1090    uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1091    keywordStart+=keywordValueLen;
1092    if(insertHere) {
1093      *keywordStart = ';';
1094      keywordStart++;
1095    }
1096    buffer[needLen]=0;
1097    return needLen;
1098}
1099
1100/* ### ID parsing implementation **************************************************/
1101
1102#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1103
1104/*returns TRUE if one of the special prefixes is here (s=string)
1105  'x-' or 'i-' */
1106#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1107
1108/* Dot terminates it because of POSIX form  where dot precedes the codepage
1109 * except for variant
1110 */
1111#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1112
1113static char* _strnchr(const char* str, int32_t len, char c) {
1114    U_ASSERT(str != 0 && len >= 0);
1115    while (len-- != 0) {
1116        char d = *str;
1117        if (d == c) {
1118            return (char*) str;
1119        } else if (d == 0) {
1120            break;
1121        }
1122        ++str;
1123    }
1124    return NULL;
1125}
1126
1127/**
1128 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1129 * a NULL entry, followed by more entries, and a second NULL entry.
1130 *
1131 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1132 * COUNTRIES_3.
1133 */
1134static int16_t _findIndex(const char* const* list, const char* key)
1135{
1136    const char* const* anchor = list;
1137    int32_t pass = 0;
1138
1139    /* Make two passes through two NULL-terminated arrays at 'list' */
1140    while (pass++ < 2) {
1141        while (*list) {
1142            if (uprv_strcmp(key, *list) == 0) {
1143                return (int16_t)(list - anchor);
1144            }
1145            list++;
1146        }
1147        ++list;     /* skip final NULL *CWB*/
1148    }
1149    return -1;
1150}
1151
1152/* count the length of src while copying it to dest; return strlen(src) */
1153static inline int32_t
1154_copyCount(char *dest, int32_t destCapacity, const char *src) {
1155    const char *anchor;
1156    char c;
1157
1158    anchor=src;
1159    for(;;) {
1160        if((c=*src)==0) {
1161            return (int32_t)(src-anchor);
1162        }
1163        if(destCapacity<=0) {
1164            return (int32_t)((src-anchor)+uprv_strlen(src));
1165        }
1166        ++src;
1167        *dest++=c;
1168        --destCapacity;
1169    }
1170}
1171
1172U_CFUNC const char*
1173uloc_getCurrentCountryID(const char* oldID){
1174    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1175    if (offset >= 0) {
1176        return REPLACEMENT_COUNTRIES[offset];
1177    }
1178    return oldID;
1179}
1180U_CFUNC const char*
1181uloc_getCurrentLanguageID(const char* oldID){
1182    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1183    if (offset >= 0) {
1184        return REPLACEMENT_LANGUAGES[offset];
1185    }
1186    return oldID;
1187}
1188/*
1189 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1190 * avoid duplicating code to handle the earlier locale ID pieces
1191 * in the functions for the later ones by
1192 * setting the *pEnd pointer to where they stopped parsing
1193 *
1194 * TODO try to use this in Locale
1195 */
1196U_CFUNC int32_t
1197ulocimp_getLanguage(const char *localeID,
1198                    char *language, int32_t languageCapacity,
1199                    const char **pEnd) {
1200    int32_t i=0;
1201    int32_t offset;
1202    char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1203
1204    /* if it starts with i- or x- then copy that prefix */
1205    if(_isIDPrefix(localeID)) {
1206        if(i<languageCapacity) {
1207            language[i]=(char)uprv_tolower(*localeID);
1208        }
1209        if(i<languageCapacity) {
1210            language[i+1]='-';
1211        }
1212        i+=2;
1213        localeID+=2;
1214    }
1215
1216    /* copy the language as far as possible and count its length */
1217    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1218        if(i<languageCapacity) {
1219            language[i]=(char)uprv_tolower(*localeID);
1220        }
1221        if(i<3) {
1222            U_ASSERT(i>=0);
1223            lang[i]=(char)uprv_tolower(*localeID);
1224        }
1225        i++;
1226        localeID++;
1227    }
1228
1229    if(i==3) {
1230        /* convert 3 character code to 2 character code if possible *CWB*/
1231        offset=_findIndex(LANGUAGES_3, lang);
1232        if(offset>=0) {
1233            i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1234        }
1235    }
1236
1237    if(pEnd!=NULL) {
1238        *pEnd=localeID;
1239    }
1240    return i;
1241}
1242
1243U_CFUNC int32_t
1244ulocimp_getScript(const char *localeID,
1245                  char *script, int32_t scriptCapacity,
1246                  const char **pEnd)
1247{
1248    int32_t idLen = 0;
1249
1250    if (pEnd != NULL) {
1251        *pEnd = localeID;
1252    }
1253
1254    /* copy the second item as far as possible and count its length */
1255    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1256            && uprv_isASCIILetter(localeID[idLen])) {
1257        idLen++;
1258    }
1259
1260    /* If it's exactly 4 characters long, then it's a script and not a country. */
1261    if (idLen == 4) {
1262        int32_t i;
1263        if (pEnd != NULL) {
1264            *pEnd = localeID+idLen;
1265        }
1266        if(idLen > scriptCapacity) {
1267            idLen = scriptCapacity;
1268        }
1269        if (idLen >= 1) {
1270            script[0]=(char)uprv_toupper(*(localeID++));
1271        }
1272        for (i = 1; i < idLen; i++) {
1273            script[i]=(char)uprv_tolower(*(localeID++));
1274        }
1275    }
1276    else {
1277        idLen = 0;
1278    }
1279    return idLen;
1280}
1281
1282U_CFUNC int32_t
1283ulocimp_getCountry(const char *localeID,
1284                   char *country, int32_t countryCapacity,
1285                   const char **pEnd)
1286{
1287    int32_t idLen=0;
1288    char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1289    int32_t offset;
1290
1291    /* copy the country as far as possible and count its length */
1292    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1293        if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1294            cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1295        }
1296        idLen++;
1297    }
1298
1299    /* the country should be either length 2 or 3 */
1300    if (idLen == 2 || idLen == 3) {
1301        UBool gotCountry = FALSE;
1302        /* convert 3 character code to 2 character code if possible *CWB*/
1303        if(idLen==3) {
1304            offset=_findIndex(COUNTRIES_3, cnty);
1305            if(offset>=0) {
1306                idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1307                gotCountry = TRUE;
1308            }
1309        }
1310        if (!gotCountry) {
1311            int32_t i = 0;
1312            for (i = 0; i < idLen; i++) {
1313                if (i < countryCapacity) {
1314                    country[i]=(char)uprv_toupper(localeID[i]);
1315                }
1316            }
1317        }
1318        localeID+=idLen;
1319    } else {
1320        idLen = 0;
1321    }
1322
1323    if(pEnd!=NULL) {
1324        *pEnd=localeID;
1325    }
1326
1327    return idLen;
1328}
1329
1330/**
1331 * @param needSeparator if true, then add leading '_' if any variants
1332 * are added to 'variant'
1333 */
1334static int32_t
1335_getVariantEx(const char *localeID,
1336              char prev,
1337              char *variant, int32_t variantCapacity,
1338              UBool needSeparator) {
1339    int32_t i=0;
1340
1341    /* get one or more variant tags and separate them with '_' */
1342    if(_isIDSeparator(prev)) {
1343        /* get a variant string after a '-' or '_' */
1344        while(!_isTerminator(*localeID)) {
1345            if (needSeparator) {
1346                if (i<variantCapacity) {
1347                    variant[i] = '_';
1348                }
1349                ++i;
1350                needSeparator = FALSE;
1351            }
1352            if(i<variantCapacity) {
1353                variant[i]=(char)uprv_toupper(*localeID);
1354                if(variant[i]=='-') {
1355                    variant[i]='_';
1356                }
1357            }
1358            i++;
1359            localeID++;
1360        }
1361    }
1362
1363    /* if there is no variant tag after a '-' or '_' then look for '@' */
1364    if(i==0) {
1365        if(prev=='@') {
1366            /* keep localeID */
1367        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1368            ++localeID; /* point after the '@' */
1369        } else {
1370            return 0;
1371        }
1372        while(!_isTerminator(*localeID)) {
1373            if (needSeparator) {
1374                if (i<variantCapacity) {
1375                    variant[i] = '_';
1376                }
1377                ++i;
1378                needSeparator = FALSE;
1379            }
1380            if(i<variantCapacity) {
1381                variant[i]=(char)uprv_toupper(*localeID);
1382                if(variant[i]=='-' || variant[i]==',') {
1383                    variant[i]='_';
1384                }
1385            }
1386            i++;
1387            localeID++;
1388        }
1389    }
1390
1391    return i;
1392}
1393
1394static int32_t
1395_getVariant(const char *localeID,
1396            char prev,
1397            char *variant, int32_t variantCapacity) {
1398    return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1399}
1400
1401/**
1402 * Delete ALL instances of a variant from the given list of one or
1403 * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1404 * @param variants the source string of one or more variants,
1405 * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1406 * terminated; if it is, trailing zero will NOT be maintained.
1407 * @param variantsLen length of variants
1408 * @param toDelete variant to delete, without separators, e.g.  "EURO"
1409 * or "PREEURO"; not zero terminated
1410 * @param toDeleteLen length of toDelete
1411 * @return number of characters deleted from variants
1412 */
1413static int32_t
1414_deleteVariant(char* variants, int32_t variantsLen,
1415               const char* toDelete, int32_t toDeleteLen)
1416{
1417    int32_t delta = 0; /* number of chars deleted */
1418    for (;;) {
1419        UBool flag = FALSE;
1420        if (variantsLen < toDeleteLen) {
1421            return delta;
1422        }
1423        if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1424            (variantsLen == toDeleteLen ||
1425             (flag=(variants[toDeleteLen] == '_'))))
1426        {
1427            int32_t d = toDeleteLen + (flag?1:0);
1428            variantsLen -= d;
1429            delta += d;
1430            if (variantsLen > 0) {
1431                uprv_memmove(variants, variants+d, variantsLen);
1432            }
1433        } else {
1434            char* p = _strnchr(variants, variantsLen, '_');
1435            if (p == NULL) {
1436                return delta;
1437            }
1438            ++p;
1439            variantsLen -= (int32_t)(p - variants);
1440            variants = p;
1441        }
1442    }
1443}
1444
1445/* Keyword enumeration */
1446
1447typedef struct UKeywordsContext {
1448    char* keywords;
1449    char* current;
1450} UKeywordsContext;
1451
1452static void U_CALLCONV
1453uloc_kw_closeKeywords(UEnumeration *enumerator) {
1454    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1455    uprv_free(enumerator->context);
1456    uprv_free(enumerator);
1457}
1458
1459static int32_t U_CALLCONV
1460uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1461    char *kw = ((UKeywordsContext *)en->context)->keywords;
1462    int32_t result = 0;
1463    while(*kw) {
1464        result++;
1465        kw += uprv_strlen(kw)+1;
1466    }
1467    return result;
1468}
1469
1470static const char* U_CALLCONV
1471uloc_kw_nextKeyword(UEnumeration* en,
1472                    int32_t* resultLength,
1473                    UErrorCode* /*status*/) {
1474    const char* result = ((UKeywordsContext *)en->context)->current;
1475    int32_t len = 0;
1476    if(*result) {
1477        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1478        ((UKeywordsContext *)en->context)->current += len+1;
1479    } else {
1480        result = NULL;
1481    }
1482    if (resultLength) {
1483        *resultLength = len;
1484    }
1485    return result;
1486}
1487
1488static void U_CALLCONV
1489uloc_kw_resetKeywords(UEnumeration* en,
1490                      UErrorCode* /*status*/) {
1491    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1492}
1493
1494static const UEnumeration gKeywordsEnum = {
1495    NULL,
1496    NULL,
1497    uloc_kw_closeKeywords,
1498    uloc_kw_countKeywords,
1499    uenum_unextDefault,
1500    uloc_kw_nextKeyword,
1501    uloc_kw_resetKeywords
1502};
1503
1504U_CAPI UEnumeration* U_EXPORT2
1505uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1506{
1507    UKeywordsContext *myContext = NULL;
1508    UEnumeration *result = NULL;
1509
1510    if(U_FAILURE(*status)) {
1511        return NULL;
1512    }
1513    result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1514    /* Null pointer test */
1515    if (result == NULL) {
1516        *status = U_MEMORY_ALLOCATION_ERROR;
1517        return NULL;
1518    }
1519    uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1520    myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1521    if (myContext == NULL) {
1522        *status = U_MEMORY_ALLOCATION_ERROR;
1523        uprv_free(result);
1524        return NULL;
1525    }
1526    myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1527    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1528    myContext->keywords[keywordListSize] = 0;
1529    myContext->current = myContext->keywords;
1530    result->context = myContext;
1531    return result;
1532}
1533
1534U_CAPI UEnumeration* U_EXPORT2
1535uloc_openKeywords(const char* localeID,
1536                        UErrorCode* status)
1537{
1538    int32_t i=0;
1539    char keywords[256];
1540    int32_t keywordsCapacity = 256;
1541    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1542    const char* tmpLocaleID;
1543
1544    if(status==NULL || U_FAILURE(*status)) {
1545        return 0;
1546    }
1547
1548    if (_hasBCP47Extension(localeID)) {
1549        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1550    } else {
1551        if (localeID==NULL) {
1552           localeID=uloc_getDefault();
1553        }
1554        tmpLocaleID=localeID;
1555    }
1556
1557    /* Skip the language */
1558    ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1559    if(_isIDSeparator(*tmpLocaleID)) {
1560        const char *scriptID;
1561        /* Skip the script if available */
1562        ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1563        if(scriptID != tmpLocaleID+1) {
1564            /* Found optional script */
1565            tmpLocaleID = scriptID;
1566        }
1567        /* Skip the Country */
1568        if (_isIDSeparator(*tmpLocaleID)) {
1569            ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1570            if(_isIDSeparator(*tmpLocaleID)) {
1571                _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1572            }
1573        }
1574    }
1575
1576    /* keywords are located after '@' */
1577    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1578        i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1579    }
1580
1581    if(i) {
1582        return uloc_openKeywordList(keywords, i, status);
1583    } else {
1584        return NULL;
1585    }
1586}
1587
1588
1589/* bit-flags for 'options' parameter of _canonicalize */
1590#define _ULOC_STRIP_KEYWORDS 0x2
1591#define _ULOC_CANONICALIZE   0x1
1592
1593#define OPTION_SET(options, mask) ((options & mask) != 0)
1594
1595static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1596#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1597
1598/**
1599 * Canonicalize the given localeID, to level 1 or to level 2,
1600 * depending on the options.  To specify level 1, pass in options=0.
1601 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1602 *
1603 * This is the code underlying uloc_getName and uloc_canonicalize.
1604 */
1605static int32_t
1606_canonicalize(const char* localeID,
1607              char* result,
1608              int32_t resultCapacity,
1609              uint32_t options,
1610              UErrorCode* err) {
1611    int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1612    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1613    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1614    const char* origLocaleID;
1615    const char* tmpLocaleID;
1616    const char* keywordAssign = NULL;
1617    const char* separatorIndicator = NULL;
1618    const char* addKeyword = NULL;
1619    const char* addValue = NULL;
1620    char* name;
1621    char* variant = NULL; /* pointer into name, or NULL */
1622
1623    if (U_FAILURE(*err)) {
1624        return 0;
1625    }
1626
1627    if (_hasBCP47Extension(localeID)) {
1628        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1629    } else {
1630        if (localeID==NULL) {
1631           localeID=uloc_getDefault();
1632        }
1633        tmpLocaleID=localeID;
1634    }
1635
1636    origLocaleID=tmpLocaleID;
1637
1638    /* if we are doing a full canonicalization, then put results in
1639       localeBuffer, if necessary; otherwise send them to result. */
1640    if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1641        (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1642        name = localeBuffer;
1643        nameCapacity = (int32_t)sizeof(localeBuffer);
1644    } else {
1645        name = result;
1646        nameCapacity = resultCapacity;
1647    }
1648
1649    /* get all pieces, one after another, and separate with '_' */
1650    len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1651
1652    if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1653        const char *d = uloc_getDefault();
1654
1655        len = (int32_t)uprv_strlen(d);
1656
1657        if (name != NULL) {
1658            uprv_strncpy(name, d, len);
1659        }
1660    } else if(_isIDSeparator(*tmpLocaleID)) {
1661        const char *scriptID;
1662
1663        ++fieldCount;
1664        if(len<nameCapacity) {
1665            name[len]='_';
1666        }
1667        ++len;
1668
1669        scriptSize=ulocimp_getScript(tmpLocaleID+1,
1670            (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1671        if(scriptSize > 0) {
1672            /* Found optional script */
1673            tmpLocaleID = scriptID;
1674            ++fieldCount;
1675            len+=scriptSize;
1676            if (_isIDSeparator(*tmpLocaleID)) {
1677                /* If there is something else, then we add the _ */
1678                if(len<nameCapacity) {
1679                    name[len]='_';
1680                }
1681                ++len;
1682            }
1683        }
1684
1685        if (_isIDSeparator(*tmpLocaleID)) {
1686            const char *cntryID;
1687            int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1688                (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1689            if (cntrySize > 0) {
1690                /* Found optional country */
1691                tmpLocaleID = cntryID;
1692                len+=cntrySize;
1693            }
1694            if(_isIDSeparator(*tmpLocaleID)) {
1695                /* If there is something else, then we add the _  if we found country before. */
1696                if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1697                    ++fieldCount;
1698                    if(len<nameCapacity) {
1699                        name[len]='_';
1700                    }
1701                    ++len;
1702                }
1703
1704                variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1705                    (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1706                if (variantSize > 0) {
1707                    variant = len<nameCapacity ? name+len : NULL;
1708                    len += variantSize;
1709                    tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1710                }
1711            }
1712        }
1713    }
1714
1715    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1716    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1717        UBool done = FALSE;
1718        do {
1719            char c = *tmpLocaleID;
1720            switch (c) {
1721            case 0:
1722            case '@':
1723                done = TRUE;
1724                break;
1725            default:
1726                if (len<nameCapacity) {
1727                    name[len] = c;
1728                }
1729                ++len;
1730                ++tmpLocaleID;
1731                break;
1732            }
1733        } while (!done);
1734    }
1735
1736    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1737       After this, tmpLocaleID either points to '@' or is NULL */
1738    if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1739        keywordAssign = uprv_strchr(tmpLocaleID, '=');
1740        separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1741    }
1742
1743    /* Copy POSIX-style variant, if any [mr@FOO] */
1744    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1745        tmpLocaleID != NULL && keywordAssign == NULL) {
1746        for (;;) {
1747            char c = *tmpLocaleID;
1748            if (c == 0) {
1749                break;
1750            }
1751            if (len<nameCapacity) {
1752                name[len] = c;
1753            }
1754            ++len;
1755            ++tmpLocaleID;
1756        }
1757    }
1758
1759    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1760        /* Handle @FOO variant if @ is present and not followed by = */
1761        if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1762            int32_t posixVariantSize;
1763            /* Add missing '_' if needed */
1764            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1765                do {
1766                    if(len<nameCapacity) {
1767                        name[len]='_';
1768                    }
1769                    ++len;
1770                    ++fieldCount;
1771                } while(fieldCount<2);
1772            }
1773            posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1774                                             (UBool)(variantSize > 0));
1775            if (posixVariantSize > 0) {
1776                if (variant == NULL) {
1777                    variant = name+len;
1778                }
1779                len += posixVariantSize;
1780                variantSize += posixVariantSize;
1781            }
1782        }
1783
1784        /* Handle generic variants first */
1785        if (variant) {
1786            for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1787                const char* variantToCompare = VARIANT_MAP[j].variant;
1788                int32_t n = (int32_t)uprv_strlen(variantToCompare);
1789                int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1790                len -= variantLen;
1791                if (variantLen > 0) {
1792                    if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1793                        --len;
1794                    }
1795                    addKeyword = VARIANT_MAP[j].keyword;
1796                    addValue = VARIANT_MAP[j].value;
1797                    break;
1798                }
1799            }
1800            if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1801                --len;
1802            }
1803        }
1804
1805        /* Look up the ID in the canonicalization map */
1806        for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1807            const char* id = CANONICALIZE_MAP[j].id;
1808            int32_t n = (int32_t)uprv_strlen(id);
1809            if (len == n && uprv_strncmp(name, id, n) == 0) {
1810                if (n == 0 && tmpLocaleID != NULL) {
1811                    break; /* Don't remap "" if keywords present */
1812                }
1813                len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1814                if (CANONICALIZE_MAP[j].keyword) {
1815                    addKeyword = CANONICALIZE_MAP[j].keyword;
1816                    addValue = CANONICALIZE_MAP[j].value;
1817                }
1818                break;
1819            }
1820        }
1821    }
1822
1823    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1824        if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1825            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1826            if(len<nameCapacity) {
1827                name[len]='@';
1828            }
1829            ++len;
1830            ++fieldCount;
1831            len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1832                                NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1833        } else if (addKeyword != NULL) {
1834            U_ASSERT(addValue != NULL && len < nameCapacity);
1835            /* inelegant but works -- later make _getKeywords do this? */
1836            len += _copyCount(name+len, nameCapacity-len, "@");
1837            len += _copyCount(name+len, nameCapacity-len, addKeyword);
1838            len += _copyCount(name+len, nameCapacity-len, "=");
1839            len += _copyCount(name+len, nameCapacity-len, addValue);
1840        }
1841    }
1842
1843    if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1844        uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1845    }
1846
1847    return u_terminateChars(result, resultCapacity, len, err);
1848}
1849
1850/* ### ID parsing API **************************************************/
1851
1852U_CAPI int32_t  U_EXPORT2
1853uloc_getParent(const char*    localeID,
1854               char* parent,
1855               int32_t parentCapacity,
1856               UErrorCode* err)
1857{
1858    const char *lastUnderscore;
1859    int32_t i;
1860
1861    if (U_FAILURE(*err))
1862        return 0;
1863
1864    if (localeID == NULL)
1865        localeID = uloc_getDefault();
1866
1867    lastUnderscore=uprv_strrchr(localeID, '_');
1868    if(lastUnderscore!=NULL) {
1869        i=(int32_t)(lastUnderscore-localeID);
1870    } else {
1871        i=0;
1872    }
1873
1874    if(i>0 && parent != localeID) {
1875        uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1876    }
1877    return u_terminateChars(parent, parentCapacity, i, err);
1878}
1879
1880U_CAPI int32_t U_EXPORT2
1881uloc_getLanguage(const char*    localeID,
1882         char* language,
1883         int32_t languageCapacity,
1884         UErrorCode* err)
1885{
1886    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1887    int32_t i=0;
1888
1889    if (err==NULL || U_FAILURE(*err)) {
1890        return 0;
1891    }
1892
1893    if(localeID==NULL) {
1894        localeID=uloc_getDefault();
1895    }
1896
1897    i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1898    return u_terminateChars(language, languageCapacity, i, err);
1899}
1900
1901U_CAPI int32_t U_EXPORT2
1902uloc_getScript(const char*    localeID,
1903         char* script,
1904         int32_t scriptCapacity,
1905         UErrorCode* err)
1906{
1907    int32_t i=0;
1908
1909    if(err==NULL || U_FAILURE(*err)) {
1910        return 0;
1911    }
1912
1913    if(localeID==NULL) {
1914        localeID=uloc_getDefault();
1915    }
1916
1917    /* skip the language */
1918    ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1919    if(_isIDSeparator(*localeID)) {
1920        i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1921    }
1922    return u_terminateChars(script, scriptCapacity, i, err);
1923}
1924
1925U_CAPI int32_t  U_EXPORT2
1926uloc_getCountry(const char* localeID,
1927            char* country,
1928            int32_t countryCapacity,
1929            UErrorCode* err)
1930{
1931    int32_t i=0;
1932
1933    if(err==NULL || U_FAILURE(*err)) {
1934        return 0;
1935    }
1936
1937    if(localeID==NULL) {
1938        localeID=uloc_getDefault();
1939    }
1940
1941    /* Skip the language */
1942    ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1943    if(_isIDSeparator(*localeID)) {
1944        const char *scriptID;
1945        /* Skip the script if available */
1946        ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1947        if(scriptID != localeID+1) {
1948            /* Found optional script */
1949            localeID = scriptID;
1950        }
1951        if(_isIDSeparator(*localeID)) {
1952            i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1953        }
1954    }
1955    return u_terminateChars(country, countryCapacity, i, err);
1956}
1957
1958U_CAPI int32_t  U_EXPORT2
1959uloc_getVariant(const char* localeID,
1960                char* variant,
1961                int32_t variantCapacity,
1962                UErrorCode* err)
1963{
1964    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1965    const char* tmpLocaleID;
1966    int32_t i=0;
1967
1968    if(err==NULL || U_FAILURE(*err)) {
1969        return 0;
1970    }
1971
1972    if (_hasBCP47Extension(localeID)) {
1973        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1974    } else {
1975        if (localeID==NULL) {
1976           localeID=uloc_getDefault();
1977        }
1978        tmpLocaleID=localeID;
1979    }
1980
1981    /* Skip the language */
1982    ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1983    if(_isIDSeparator(*tmpLocaleID)) {
1984        const char *scriptID;
1985        /* Skip the script if available */
1986        ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1987        if(scriptID != tmpLocaleID+1) {
1988            /* Found optional script */
1989            tmpLocaleID = scriptID;
1990        }
1991        /* Skip the Country */
1992        if (_isIDSeparator(*tmpLocaleID)) {
1993            const char *cntryID;
1994            ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
1995            if (cntryID != tmpLocaleID+1) {
1996                /* Found optional country */
1997                tmpLocaleID = cntryID;
1998            }
1999            if(_isIDSeparator(*tmpLocaleID)) {
2000                /* If there was no country ID, skip a possible extra IDSeparator */
2001                if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2002                    tmpLocaleID++;
2003                }
2004                i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2005            }
2006        }
2007    }
2008
2009    /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2010    /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2011/*
2012    if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2013        i=_getVariant(localeID+1, '@', variant, variantCapacity);
2014    }
2015*/
2016    return u_terminateChars(variant, variantCapacity, i, err);
2017}
2018
2019U_CAPI int32_t  U_EXPORT2
2020uloc_getName(const char* localeID,
2021             char* name,
2022             int32_t nameCapacity,
2023             UErrorCode* err)
2024{
2025    return _canonicalize(localeID, name, nameCapacity, 0, err);
2026}
2027
2028U_CAPI int32_t  U_EXPORT2
2029uloc_getBaseName(const char* localeID,
2030                 char* name,
2031                 int32_t nameCapacity,
2032                 UErrorCode* err)
2033{
2034    return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2035}
2036
2037U_CAPI int32_t  U_EXPORT2
2038uloc_canonicalize(const char* localeID,
2039                  char* name,
2040                  int32_t nameCapacity,
2041                  UErrorCode* err)
2042{
2043    return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2044}
2045
2046U_CAPI const char*  U_EXPORT2
2047uloc_getISO3Language(const char* localeID)
2048{
2049    int16_t offset;
2050    char lang[ULOC_LANG_CAPACITY];
2051    UErrorCode err = U_ZERO_ERROR;
2052
2053    if (localeID == NULL)
2054    {
2055        localeID = uloc_getDefault();
2056    }
2057    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2058    if (U_FAILURE(err))
2059        return "";
2060    offset = _findIndex(LANGUAGES, lang);
2061    if (offset < 0)
2062        return "";
2063    return LANGUAGES_3[offset];
2064}
2065
2066U_CAPI const char*  U_EXPORT2
2067uloc_getISO3Country(const char* localeID)
2068{
2069    int16_t offset;
2070    char cntry[ULOC_LANG_CAPACITY];
2071    UErrorCode err = U_ZERO_ERROR;
2072
2073    if (localeID == NULL)
2074    {
2075        localeID = uloc_getDefault();
2076    }
2077    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2078    if (U_FAILURE(err))
2079        return "";
2080    offset = _findIndex(COUNTRIES, cntry);
2081    if (offset < 0)
2082        return "";
2083
2084    return COUNTRIES_3[offset];
2085}
2086
2087U_CAPI uint32_t  U_EXPORT2
2088uloc_getLCID(const char* localeID)
2089{
2090    UErrorCode status = U_ZERO_ERROR;
2091    char       langID[ULOC_FULLNAME_CAPACITY];
2092
2093    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2094    if (U_FAILURE(status)) {
2095        return 0;
2096    }
2097
2098    return uprv_convertToLCID(langID, localeID, &status);
2099}
2100
2101U_CAPI int32_t U_EXPORT2
2102uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2103                UErrorCode *status)
2104{
2105    int32_t length;
2106    const char *posix = uprv_convertToPosix(hostid, status);
2107    if (U_FAILURE(*status) || posix == NULL) {
2108        return 0;
2109    }
2110    length = (int32_t)uprv_strlen(posix);
2111    if (length+1 > localeCapacity) {
2112        *status = U_BUFFER_OVERFLOW_ERROR;
2113    }
2114    else {
2115        uprv_strcpy(locale, posix);
2116    }
2117    return length;
2118}
2119
2120/* ### Default locale **************************************************/
2121
2122U_CAPI const char*  U_EXPORT2
2123uloc_getDefault()
2124{
2125    return locale_get_default();
2126}
2127
2128U_CAPI void  U_EXPORT2
2129uloc_setDefault(const char*   newDefaultLocale,
2130             UErrorCode* err)
2131{
2132    if (U_FAILURE(*err))
2133        return;
2134    /* the error code isn't currently used for anything by this function*/
2135
2136    /* propagate change to C++ */
2137    locale_set_default(newDefaultLocale);
2138}
2139
2140/**
2141 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2142 * to an array of pointers to arrays of char.  All of these pointers are owned
2143 * by ICU-- do not delete them, and do not write through them.  The array is
2144 * terminated with a null pointer.
2145 */
2146U_CAPI const char* const*  U_EXPORT2
2147uloc_getISOLanguages()
2148{
2149    return LANGUAGES;
2150}
2151
2152/**
2153 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2154 * pointer to an array of pointers to arrays of char.  All of these pointers are
2155 * owned by ICU-- do not delete them, and do not write through them.  The array is
2156 * terminated with a null pointer.
2157 */
2158U_CAPI const char* const*  U_EXPORT2
2159uloc_getISOCountries()
2160{
2161    return COUNTRIES;
2162}
2163
2164
2165/* this function to be moved into cstring.c later */
2166static char gDecimal = 0;
2167
2168static /* U_CAPI */
2169double
2170/* U_EXPORT2 */
2171_uloc_strtod(const char *start, char **end) {
2172    char *decimal;
2173    char *myEnd;
2174    char buf[30];
2175    double rv;
2176    if (!gDecimal) {
2177        char rep[5];
2178        /* For machines that decide to change the decimal on you,
2179        and try to be too smart with localization.
2180        This normally should be just a '.'. */
2181        sprintf(rep, "%+1.1f", 1.0);
2182        gDecimal = rep[2];
2183    }
2184
2185    if(gDecimal == '.') {
2186        return uprv_strtod(start, end); /* fall through to OS */
2187    } else {
2188        uprv_strncpy(buf, start, 29);
2189        buf[29]=0;
2190        decimal = uprv_strchr(buf, '.');
2191        if(decimal) {
2192            *decimal = gDecimal;
2193        } else {
2194            return uprv_strtod(start, end); /* no decimal point */
2195        }
2196        rv = uprv_strtod(buf, &myEnd);
2197        if(end) {
2198            *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2199        }
2200        return rv;
2201    }
2202}
2203
2204typedef struct {
2205    float q;
2206    int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2207    char *locale;
2208} _acceptLangItem;
2209
2210static int32_t U_CALLCONV
2211uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2212{
2213    const _acceptLangItem *aa = (const _acceptLangItem*)a;
2214    const _acceptLangItem *bb = (const _acceptLangItem*)b;
2215
2216    int32_t rc = 0;
2217    if(bb->q < aa->q) {
2218        rc = -1;  /* A > B */
2219    } else if(bb->q > aa->q) {
2220        rc = 1;   /* A < B */
2221    } else {
2222        rc = 0;   /* A = B */
2223    }
2224
2225    if(rc==0) {
2226        rc = uprv_stricmp(aa->locale, bb->locale);
2227    }
2228
2229#if defined(ULOC_DEBUG)
2230    /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2231    aa->locale, aa->q,
2232    bb->locale, bb->q,
2233    rc);*/
2234#endif
2235
2236    return rc;
2237}
2238
2239/*
2240mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2241*/
2242
2243U_CAPI int32_t U_EXPORT2
2244uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2245                            const char *httpAcceptLanguage,
2246                            UEnumeration* availableLocales,
2247                            UErrorCode *status)
2248{
2249    _acceptLangItem *j;
2250    _acceptLangItem smallBuffer[30];
2251    char **strs;
2252    char tmp[ULOC_FULLNAME_CAPACITY +1];
2253    int32_t n = 0;
2254    const char *itemEnd;
2255    const char *paramEnd;
2256    const char *s;
2257    const char *t;
2258    int32_t res;
2259    int32_t i;
2260    int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2261    int32_t jSize;
2262    char *tempstr; /* Use for null pointer check */
2263
2264    j = smallBuffer;
2265    jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2266    if(U_FAILURE(*status)) {
2267        return -1;
2268    }
2269
2270    for(s=httpAcceptLanguage;s&&*s;) {
2271        while(isspace(*s)) /* eat space at the beginning */
2272            s++;
2273        itemEnd=uprv_strchr(s,',');
2274        paramEnd=uprv_strchr(s,';');
2275        if(!itemEnd) {
2276            itemEnd = httpAcceptLanguage+l; /* end of string */
2277        }
2278        if(paramEnd && paramEnd<itemEnd) {
2279            /* semicolon (;) is closer than end (,) */
2280            t = paramEnd+1;
2281            if(*t=='q') {
2282                t++;
2283            }
2284            while(isspace(*t)) {
2285                t++;
2286            }
2287            if(*t=='=') {
2288                t++;
2289            }
2290            while(isspace(*t)) {
2291                t++;
2292            }
2293            j[n].q = (float)_uloc_strtod(t,NULL);
2294        } else {
2295            /* no semicolon - it's 1.0 */
2296            j[n].q = 1.0f;
2297            paramEnd = itemEnd;
2298        }
2299        j[n].dummy=0;
2300        /* eat spaces prior to semi */
2301        for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2302            ;
2303        /* Check for null pointer from uprv_strndup */
2304        tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2305        if (tempstr == NULL) {
2306            *status = U_MEMORY_ALLOCATION_ERROR;
2307            return -1;
2308        }
2309        j[n].locale = tempstr;
2310        uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2311        if(strcmp(j[n].locale,tmp)) {
2312            uprv_free(j[n].locale);
2313            j[n].locale=uprv_strdup(tmp);
2314        }
2315#if defined(ULOC_DEBUG)
2316        /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2317#endif
2318        n++;
2319        s = itemEnd;
2320        while(*s==',') { /* eat duplicate commas */
2321            s++;
2322        }
2323        if(n>=jSize) {
2324            if(j==smallBuffer) {  /* overflowed the small buffer. */
2325                j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
2326                if(j!=NULL) {
2327                    uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2328                }
2329#if defined(ULOC_DEBUG)
2330                fprintf(stderr,"malloced at size %d\n", jSize);
2331#endif
2332            } else {
2333                j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
2334#if defined(ULOC_DEBUG)
2335                fprintf(stderr,"re-alloced at size %d\n", jSize);
2336#endif
2337            }
2338            jSize *= 2;
2339            if(j==NULL) {
2340                *status = U_MEMORY_ALLOCATION_ERROR;
2341                return -1;
2342            }
2343        }
2344    }
2345    uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2346    if(U_FAILURE(*status)) {
2347        if(j != smallBuffer) {
2348#if defined(ULOC_DEBUG)
2349            fprintf(stderr,"freeing j %p\n", j);
2350#endif
2351            uprv_free(j);
2352        }
2353        return -1;
2354    }
2355    strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
2356    /* Check for null pointer */
2357    if (strs == NULL) {
2358        uprv_free(j); /* Free to avoid memory leak */
2359        *status = U_MEMORY_ALLOCATION_ERROR;
2360        return -1;
2361    }
2362    for(i=0;i<n;i++) {
2363#if defined(ULOC_DEBUG)
2364        /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2365#endif
2366        strs[i]=j[i].locale;
2367    }
2368    res =  uloc_acceptLanguage(result, resultAvailable, outResult,
2369        (const char**)strs, n, availableLocales, status);
2370    for(i=0;i<n;i++) {
2371        uprv_free(strs[i]);
2372    }
2373    uprv_free(strs);
2374    if(j != smallBuffer) {
2375#if defined(ULOC_DEBUG)
2376        fprintf(stderr,"freeing j %p\n", j);
2377#endif
2378        uprv_free(j);
2379    }
2380    return res;
2381}
2382
2383
2384U_CAPI int32_t U_EXPORT2
2385uloc_acceptLanguage(char *result, int32_t resultAvailable,
2386                    UAcceptResult *outResult, const char **acceptList,
2387                    int32_t acceptListCount,
2388                    UEnumeration* availableLocales,
2389                    UErrorCode *status)
2390{
2391    int32_t i,j;
2392    int32_t len;
2393    int32_t maxLen=0;
2394    char tmp[ULOC_FULLNAME_CAPACITY+1];
2395    const char *l;
2396    char **fallbackList;
2397    if(U_FAILURE(*status)) {
2398        return -1;
2399    }
2400    fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2401    if(fallbackList==NULL) {
2402        *status = U_MEMORY_ALLOCATION_ERROR;
2403        return -1;
2404    }
2405    for(i=0;i<acceptListCount;i++) {
2406#if defined(ULOC_DEBUG)
2407        fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2408#endif
2409        while((l=uenum_next(availableLocales, NULL, status))) {
2410#if defined(ULOC_DEBUG)
2411            fprintf(stderr,"  %s\n", l);
2412#endif
2413            len = (int32_t)uprv_strlen(l);
2414            if(!uprv_strcmp(acceptList[i], l)) {
2415                if(outResult) {
2416                    *outResult = ULOC_ACCEPT_VALID;
2417                }
2418#if defined(ULOC_DEBUG)
2419                fprintf(stderr, "MATCH! %s\n", l);
2420#endif
2421                if(len>0) {
2422                    uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2423                }
2424                for(j=0;j<i;j++) {
2425                    uprv_free(fallbackList[j]);
2426                }
2427                uprv_free(fallbackList);
2428                return u_terminateChars(result, resultAvailable, len, status);
2429            }
2430            if(len>maxLen) {
2431                maxLen = len;
2432            }
2433        }
2434        uenum_reset(availableLocales, status);
2435        /* save off parent info */
2436        if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2437            fallbackList[i] = uprv_strdup(tmp);
2438        } else {
2439            fallbackList[i]=0;
2440        }
2441    }
2442
2443    for(maxLen--;maxLen>0;maxLen--) {
2444        for(i=0;i<acceptListCount;i++) {
2445            if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2446#if defined(ULOC_DEBUG)
2447                fprintf(stderr,"Try: [%s]", fallbackList[i]);
2448#endif
2449                while((l=uenum_next(availableLocales, NULL, status))) {
2450#if defined(ULOC_DEBUG)
2451                    fprintf(stderr,"  %s\n", l);
2452#endif
2453                    len = (int32_t)uprv_strlen(l);
2454                    if(!uprv_strcmp(fallbackList[i], l)) {
2455                        if(outResult) {
2456                            *outResult = ULOC_ACCEPT_FALLBACK;
2457                        }
2458#if defined(ULOC_DEBUG)
2459                        fprintf(stderr, "fallback MATCH! %s\n", l);
2460#endif
2461                        if(len>0) {
2462                            uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2463                        }
2464                        for(j=0;j<acceptListCount;j++) {
2465                            uprv_free(fallbackList[j]);
2466                        }
2467                        uprv_free(fallbackList);
2468                        return u_terminateChars(result, resultAvailable, len, status);
2469                    }
2470                }
2471                uenum_reset(availableLocales, status);
2472
2473                if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2474                    uprv_free(fallbackList[i]);
2475                    fallbackList[i] = uprv_strdup(tmp);
2476                } else {
2477                    uprv_free(fallbackList[i]);
2478                    fallbackList[i]=0;
2479                }
2480            }
2481        }
2482        if(outResult) {
2483            *outResult = ULOC_ACCEPT_FAILED;
2484        }
2485    }
2486    for(i=0;i<acceptListCount;i++) {
2487        uprv_free(fallbackList[i]);
2488    }
2489    uprv_free(fallbackList);
2490    return -1;
2491}
2492
2493/*eof*/
2494