1/*
2**********************************************************************
3*   Copyright (C) 1997-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   04/01/97    aliu        Creation.
13*   08/21/98    stephen     JDK 1.2 sync
14*   12/08/98    rtg         New Locale implementation and C API
15*   03/15/99    damiba      overhaul.
16*   04/06/99    stephen     changed setDefault() to realloc and copy
17*   06/14/99    stephen     Changed calls to ures_open for new params
18*   07/21/99    stephen     Modified setDefault() to propagate to C++
19*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
20*                           brought canonicalization code into line with spec
21*****************************************************************************/
22
23/*
24   POSIX's locale format, from putil.c: [no spaces]
25
26     ll [ _CC ] [ . MM ] [ @ VV]
27
28     l = lang, C = ctry, M = charmap, V = variant
29*/
30
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
34
35#include "putilimp.h"
36#include "ustr_imp.h"
37#include "ulocimp.h"
38#include "umutex.h"
39#include "cstring.h"
40#include "cmemory.h"
41#include "ucln_cmn.h"
42#include "locmap.h"
43#include "uarrsort.h"
44#include "uenumimp.h"
45#include "uassert.h"
46
47#include <stdio.h> /* for sprintf */
48
49/* ### Declarations **************************************************/
50
51/* Locale stuff from locid.cpp */
52U_CFUNC void locale_set_default(const char *id);
53U_CFUNC const char *locale_get_default(void);
54U_CFUNC int32_t
55locale_getKeywords(const char *localeID,
56            char prev,
57            char *keywords, int32_t keywordCapacity,
58            char *values, int32_t valuesCapacity, int32_t *valLen,
59            UBool valuesToo,
60            UErrorCode *status);
61
62/* ### Data tables **************************************************/
63
64/**
65 * Table of language codes, both 2- and 3-letter, with preference
66 * given to 2-letter codes where possible.  Includes 3-letter codes
67 * that lack a 2-letter equivalent.
68 *
69 * This list must be in sorted order.  This list is returned directly
70 * to the user by some API.
71 *
72 * This list must be kept in sync with LANGUAGES_3, with corresponding
73 * entries matched.
74 *
75 * This table should be terminated with a NULL entry, followed by a
76 * second list, and another NULL entry.  The first list is visible to
77 * user code when this array is returned by API.  The second list
78 * contains codes we support, but do not expose through user API.
79 *
80 * Notes
81 *
82 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
83 * include the revisions up to 2001/7/27 *CWB*
84 *
85 * The 3 character codes are the terminology codes like RFC 3066.  This
86 * is compatible with prior ICU codes
87 *
88 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
89 * table but now at the end of the table because 3 character codes are
90 * duplicates.  This avoids bad searches going from 3 to 2 character
91 * codes.
92 *
93 * The range qaa-qtz is reserved for local use
94 */
95/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
96/* ISO639 table version is 20130531 */
97static const char * const LANGUAGES[] = {
98    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",
99    "afa", "afh", "agq", "ain", "ak",  "akk", "ale", "alg",
100    "alt", "am",  "an",  "ang", "anp", "apa", "ar",  "arc",
101    "arn", "arp", "art", "arw", "as",  "asa", "ast", "ath",
102    "aus", "av",  "awa", "ay",  "az",
103    "ba",  "bad", "bai", "bal", "ban", "bas", "bat", "bax",
104    "bbj", "be",  "bej", "bem", "ber", "bez", "bfd", "bg",
105    "bh",  "bho", "bi",  "bik", "bin", "bkm", "bla", "bm",
106    "bn",  "bnt", "bo",  "br",  "bra", "brx", "bs",  "bss",
107    "btk", "bua", "bug", "bum", "byn", "byv",
108    "ca",  "cad", "cai", "car", "cau", "cay", "cch", "ce",
109    "ceb", "cel", "cgg", "ch",  "chb", "chg", "chk", "chm",
110    "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "co",
111    "cop", "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",
112    "csb", "cu",  "cus", "cv",  "cy",
113    "da",  "dak", "dar", "dav", "day", "de",  "del", "den",
114    "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
115    "dv",  "dyo", "dyu", "dz",  "dzg",
116    "ebu", "ee",  "efi", "egy", "eka", "el",  "elx", "en",
117    "enm", "eo",  "es",  "et",  "eu",  "ewo",
118    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",
119    "fo",  "fon", "fr",  "frm", "fro", "frr", "frs", "fur",
120    "fy",
121    "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
122    "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
123    "grc", "gsw", "gu",  "guz", "gv",  "gwi",
124    "ha",  "hai", "haw", "he",  "hi",  "hil", "him", "hit",
125    "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",
126    "hz",
127    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ijo",
128    "ik",  "ilo", "inc", "ine", "inh", "io",  "ira", "iro",
129    "is",  "it",  "iu",
130    "ja",  "jbo", "jgo", "jmc", "jpr", "jrb", "jv",
131    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
132    "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kg",  "kha",
133    "khi", "kho", "khq", "ki",  "kj",  "kk",  "kkj", "kl",
134    "kln", "km",  "kmb", "kn",  "ko",  "kok", "kos", "kpe",
135    "kr",  "krc", "krl", "kro", "kru", "ks",  "ksb", "ksf",
136    "ksh", "ku",  "kum", "kut", "kv",  "kw",  "ky",
137    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lg",
138    "li",  "lkt", "ln",  "lo",  "lol", "loz", "lt",  "lu",
139    "lua", "lui", "lun", "luo", "lus", "luy", "lv",
140    "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
141    "mde", "mdf", "mdr", "men", "mer", "mfe", "mg",  "mga",
142    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
143    "mkh", "ml",  "mn",  "mnc", "mni", "mno", "mo",  "moh",
144    "mos", "mr",  "ms",  "mt",  "mua", "mul", "mun", "mus",
145    "mwl", "mwr", "my",  "mye", "myn", "myv",
146    "na",  "nah", "nai", "nap", "naq", "nb",  "nd",  "nds",
147    "ne",  "new", "ng",  "nia", "nic", "niu", "nl",  "nmg",
148    "nn",  "nnh", "no",  "nog", "non", "nqo", "nr",  "nso",
149    "nub", "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo",
150    "nzi",
151    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota", "oto",
152    "pa",  "paa", "pag", "pal", "pam", "pap", "pau", "peo",
153    "phi", "phn", "pi",  "pl",  "pon", "pra", "pro", "ps",
154    "pt",
155    "qu",
156    "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rof",
157    "rom", "ru",  "rup", "rw",  "rwk",
158    "sa",  "sad", "sah", "sai", "sal", "sam", "saq", "sas",
159    "sat", "sba", "sbp", "sc",  "scn", "sco", "sd",  "se",
160    "see", "seh", "sel", "sem", "ses", "sg",  "sga", "sgn",
161    "shi", "shn", "shu", "si",  "sid", "sio", "sit",
162    "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
163    "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
164    "srn", "srr", "ss",  "ssa", "ssy", "st",  "su",  "suk",
165    "sus", "sux", "sv",  "sw",  "swb", "swc", "syc", "syr",
166    "ta",  "tai", "te",  "tem", "teo", "ter", "tet", "tg",
167    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tl",  "tlh",
168    "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",  "trv",
169    "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
170    "twq", "ty",  "tyv", "tzm",
171    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
172    "vai", "ve",  "vi",  "vo",  "vot", "vun",
173    "wa",  "wae", "wak", "wal", "war", "was", "wen", "wo",
174    "xal", "xh",  "xog",
175    "yao", "yap", "yav", "ybb", "yi",  "yo",  "ypk", "yue",
176    "za",  "zap", "zbl", "zen", "zgh", "zh",  "znd", "zu",
177    "zun", "zxx", "zza",
178NULL,
179    "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
180NULL
181};
182
183static const char* const DEPRECATED_LANGUAGES[]={
184    "in", "iw", "ji", "jw", NULL, NULL
185};
186static const char* const REPLACEMENT_LANGUAGES[]={
187    "id", "he", "yi", "jv", NULL, NULL
188};
189
190/**
191 * Table of 3-letter language codes.
192 *
193 * This is a lookup table used to convert 3-letter language codes to
194 * their 2-letter equivalent, where possible.  It must be kept in sync
195 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
196 * same language as LANGUAGES_3[i].  The commented-out lines are
197 * copied from LANGUAGES to make eyeballing this baby easier.
198 *
199 * Where a 3-letter language code has no 2-letter equivalent, the
200 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
201 *
202 * This table should be terminated with a NULL entry, followed by a
203 * second list, and another NULL entry.  The two lists correspond to
204 * the two lists in LANGUAGES.
205 */
206/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
207/* ISO639 table version is 20130531 */
208static const char * const LANGUAGES_3[] = {
209    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr",
210    "afa", "afh", "agq", "ain", "aka", "akk", "ale", "alg",
211    "alt", "amh", "arg", "ang", "anp", "apa", "ara", "arc",
212    "arn", "arp", "art", "arw", "asm", "asa", "ast", "ath",
213    "aus", "ava", "awa", "aym", "aze",
214    "bak", "bad", "bai", "bal", "ban", "bas", "bat", "bax",
215    "bbj", "bel", "bej", "bem", "ber", "bez", "bfd", "bul",
216    "bih", "bho", "bis", "bik", "bin", "bkm", "bla", "bam",
217    "ben", "bnt", "bod", "bre", "bra", "brx", "bos", "bss",
218    "btk", "bua", "bug", "bum", "byn", "byv",
219    "cat", "cad", "cai", "car", "cau", "cay", "cch", "che",
220    "ceb", "cel", "cgg", "cha", "chb", "chg", "chk", "chm",
221    "chn", "cho", "chp", "chr", "chy", "ckb", "cmc", "cos",
222    "cop", "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces",
223    "csb", "chu", "cus", "chv", "cym",
224    "dan", "dak", "dar", "dav", "day", "deu", "del", "den",
225    "dgr", "din", "dje", "doi", "dra", "dsb", "dua", "dum",
226    "div", "dyo", "dyu", "dzo", "dzg",
227    "ebu", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
228    "enm", "epo", "spa", "est", "eus", "ewo",
229    "fas", "fan", "fat", "ful", "fin", "fil", "fiu", "fij",
230    "fao", "fon", "fra", "frm", "fro", "frr", "frs", "fur",
231    "fry",
232    "gle", "gaa", "gay", "gba", "gla", "gem", "gez", "gil",
233    "glg", "gmh", "grn", "goh", "gon", "gor", "got", "grb",
234    "grc", "gsw", "guj", "guz", "glv", "gwi",
235    "hau", "hai", "haw", "heb", "hin", "hil", "him", "hit",
236    "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye",
237    "her",
238    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ijo",
239    "ipk", "ilo", "inc", "ine", "inh", "ido", "ira", "iro",
240    "isl", "ita", "iku",
241    "jpn", "jbo", "jgo", "jmc", "jpr", "jrb", "jav",
242    "kat", "kaa", "kab", "kac", "kaj", "kam", "kar", "kaw",
243    "kbd", "kbl", "kcg", "kde", "kea", "kfo", "kon", "kha",
244    "khi", "kho", "khq", "kik", "kua", "kaz", "kkj", "kal",
245    "kln", "khm", "kmb", "kan", "kor", "kok", "kos", "kpe",
246    "kau", "krc", "krl", "kro", "kru", "kas", "ksb", "ksf",
247    "ksh", "kur", "kum", "kut", "kom", "cor", "kir",
248    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lug",
249    "lim", "lkt", "lin", "lao", "lol", "loz", "lit", "lub",
250    "lua", "lui", "lun", "luo", "lus", "luy", "lav",
251    "mad", "maf", "mag", "mai", "mak", "man", "map", "mas",
252    "mde", "mdf", "mdr", "men", "mer", "mfe", "mlg", "mga",
253    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
254    "mkh", "mal", "mon", "mnc", "mni", "mno", "mol", "moh",
255    "mos", "mar", "msa", "mlt", "mua", "mul", "mun", "mus",
256    "mwl", "mwr", "mya", "mye", "myn", "myv",
257    "nau", "nah", "nai", "nap", "naq", "nob", "nde", "nds",
258    "nep", "new", "ndo", "nia", "nic", "niu", "nld", "nmg",
259    "nno", "nnh", "nor", "nog", "non", "nqo", "nbl", "nso",
260    "nub", "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo",
261    "nzi",
262    "oci", "oji", "orm", "ori", "oss", "osa", "ota", "oto",
263    "pan", "paa", "pag", "pal", "pam", "pap", "pau", "peo",
264    "phi", "phn", "pli", "pol", "pon", "pra", "pro", "pus",
265    "por",
266    "que",
267    "raj", "rap", "rar", "roh", "run", "ron", "roa", "rof",
268    "rom", "rus", "rup", "kin", "rwk",
269    "san", "sad", "sah", "sai", "sal", "sam", "saq", "sas",
270    "sat", "sba", "sbp", "srd", "scn", "sco", "snd", "sme",
271    "see", "seh", "sel", "sem", "ses", "sag", "sga", "sgn",
272    "shi", "shn", "shu", "sin", "sid", "sio", "sit",
273    "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
274    "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
275    "srn", "srr", "ssw", "ssa", "ssy", "sot", "sun", "suk",
276    "sus", "sux", "swe", "swa", "swb", "swc", "syc", "syr",
277    "tam", "tai", "tel", "tem", "teo", "ter", "tet", "tgk",
278    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tgl", "tlh",
279    "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
280    "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
281    "twq", "tah", "tyv", "tzm",
282    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
283    "vai", "ven", "vie", "vol", "vot", "vun",
284    "wln", "wae", "wak", "wal", "war", "was", "wen", "wol",
285    "xal", "xho", "xog",
286    "yao", "yap", "yav", "ybb", "yid", "yor", "ypk", "yue",
287    "zha", "zap", "zbl", "zen", "zgh", "zho", "znd", "zul",
288    "zun", "zxx", "zza",
289NULL,
290/*  "in",  "iw",  "ji",  "jw",  "sh",                          */
291    "ind", "heb", "yid", "jaw", "srp",
292NULL
293};
294
295/**
296 * Table of 2-letter country codes.
297 *
298 * This list must be in sorted order.  This list is returned directly
299 * to the user by some API.
300 *
301 * This list must be kept in sync with COUNTRIES_3, with corresponding
302 * entries matched.
303 *
304 * This table should be terminated with a NULL entry, followed by a
305 * second list, and another NULL entry.  The first list is visible to
306 * user code when this array is returned by API.  The second list
307 * contains codes we support, but do not expose through user API.
308 *
309 * Notes:
310 *
311 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
312 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
313 * new codes keeping the old ones for compatibility updated to include
314 * 1999/12/03 revisions *CWB*
315 *
316 * RO(ROM) is now RO(ROU) according to
317 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
318 */
319static const char * const COUNTRIES[] = {
320    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
321    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
322    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
323    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
324    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
325    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
326    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
327    "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
328    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
329    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
330    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
331    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
332    "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
333    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
334    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
335    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
336    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
337    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
338    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
339    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
340    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
341    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
342    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
343    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
344    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
345    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
346    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
347    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
348    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
349    "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
350NULL,
351    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
352NULL
353};
354
355static const char* const DEPRECATED_COUNTRIES[] = {
356    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
357};
358static const char* const REPLACEMENT_COUNTRIES[] = {
359/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
360    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
361};
362
363/**
364 * Table of 3-letter country codes.
365 *
366 * This is a lookup table used to convert 3-letter country codes to
367 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
368 * For all valid i, COUNTRIES[i] must refer to the same country as
369 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
370 * to make eyeballing this baby easier.
371 *
372 * This table should be terminated with a NULL entry, followed by a
373 * second list, and another NULL entry.  The two lists correspond to
374 * the two lists in COUNTRIES.
375 */
376static const char * const COUNTRIES_3[] = {
377/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
378    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
379/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
380    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
381/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
382    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
383/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
384    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
385/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
386    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
387/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
388    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
389/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
390    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
391/*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
392    "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
393/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
394    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
395/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
396    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
397/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
398    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
399/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
400    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
401/*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
402    "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
403/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
404    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
405/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
406    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
407/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
408    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
409/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
410    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
411/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
412    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
413/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
414    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
415/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
416    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
417/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
418    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
419/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
420    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
421/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
422    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
423/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
424    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
425/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
426    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
427/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
428    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
429/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
430    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
431/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
432    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
433/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
434    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
435/*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
436    "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
437NULL,
438/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
439    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
440NULL
441};
442
443typedef struct CanonicalizationMap {
444    const char *id;          /* input ID */
445    const char *canonicalID; /* canonicalized output ID */
446    const char *keyword;     /* keyword, or NULL if none */
447    const char *value;       /* keyword value, or NULL if kw==NULL */
448} CanonicalizationMap;
449
450/**
451 * A map to canonicalize locale IDs.  This handles a variety of
452 * different semantic kinds of transformations.
453 */
454static const CanonicalizationMap CANONICALIZE_MAP[] = {
455    { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
456    { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
457    { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
458    { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
459    { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
460    { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
461    { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
462    { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
463    { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
464    { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
465    { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
466    { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
467    { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
468    { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
469    { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
470    { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
471    { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
472    { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
473    { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
474    { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
475    { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
476    { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
477    { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
478    { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
479    { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
480    { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
481    { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
482    { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
483    { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
484    { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
485    { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
486    { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
487    { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
488    { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
489    { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
490    { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
491    { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
492    { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
493    { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
494    { "zh_GAN",         "gan", NULL, NULL }, /* registered name */
495    { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
496    { "zh_HAKKA",       "hak", NULL, NULL }, /* registered name */
497    { "zh_MIN_NAN",     "nan", NULL, NULL }, /* registered name */
498    { "zh_WUU",         "wuu", NULL, NULL }, /* registered name */
499    { "zh_XIANG",       "hsn", NULL, NULL }, /* registered name */
500    { "zh_YUE",         "yue", NULL, NULL }, /* registered name */
501};
502
503typedef struct VariantMap {
504    const char *variant;          /* input ID */
505    const char *keyword;     /* keyword, or NULL if none */
506    const char *value;       /* keyword value, or NULL if kw==NULL */
507} VariantMap;
508
509static const VariantMap VARIANT_MAP[] = {
510    { "EURO",   "currency", "EUR" },
511    { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
512    { "STROKE", "collation", "stroke" }  /* Solaris variant */
513};
514
515/* ### BCP47 Conversion *******************************************/
516/* Test if the locale id has BCP47 u extension and does not have '@' */
517#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
518/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
519#define _ConvertBCP47(finalID, id, buffer, length,err) \
520        if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
521            finalID=id; \
522        } else { \
523            finalID=buffer; \
524        }
525/* Gets the size of the shortest subtag in the given localeID. */
526static int32_t getShortestSubtagLength(const char *localeID) {
527    int32_t localeIDLength = uprv_strlen(localeID);
528    int32_t length = localeIDLength;
529    int32_t tmpLength = 0;
530    int32_t i;
531    UBool reset = TRUE;
532
533    for (i = 0; i < localeIDLength; i++) {
534        if (localeID[i] != '_' && localeID[i] != '-') {
535            if (reset) {
536                tmpLength = 0;
537                reset = FALSE;
538            }
539            tmpLength++;
540        } else {
541            if (tmpLength != 0 && tmpLength < length) {
542                length = tmpLength;
543            }
544            reset = TRUE;
545        }
546    }
547
548    return length;
549}
550
551/* ### Keywords **************************************************/
552
553#define ULOC_KEYWORD_BUFFER_LEN 25
554#define ULOC_MAX_NO_KEYWORDS 25
555
556U_CAPI const char * U_EXPORT2
557locale_getKeywordsStart(const char *localeID) {
558    const char *result = NULL;
559    if((result = uprv_strchr(localeID, '@')) != NULL) {
560        return result;
561    }
562#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
563    else {
564        /* We do this because the @ sign is variant, and the @ sign used on one
565        EBCDIC machine won't be compiled the same way on other EBCDIC based
566        machines. */
567        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
568        const uint8_t *charToFind = ebcdicSigns;
569        while(*charToFind) {
570            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
571                return result;
572            }
573            charToFind++;
574        }
575    }
576#endif
577    return NULL;
578}
579
580/**
581 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
582 * @param keywordName incoming name to be canonicalized
583 * @param status return status (keyword too long)
584 * @return length of the keyword name
585 */
586static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
587{
588  int32_t i;
589  int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
590
591  if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
592    /* keyword name too long for internal buffer */
593    *status = U_INTERNAL_PROGRAM_ERROR;
594          return 0;
595  }
596
597  /* normalize the keyword name */
598  for(i = 0; i < keywordNameLen; i++) {
599    buf[i] = uprv_tolower(keywordName[i]);
600  }
601  buf[i] = 0;
602
603  return keywordNameLen;
604}
605
606typedef struct {
607    char keyword[ULOC_KEYWORD_BUFFER_LEN];
608    int32_t keywordLen;
609    const char *valueStart;
610    int32_t valueLen;
611} KeywordStruct;
612
613static int32_t U_CALLCONV
614compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
615    const char* leftString = ((const KeywordStruct *)left)->keyword;
616    const char* rightString = ((const KeywordStruct *)right)->keyword;
617    return uprv_strcmp(leftString, rightString);
618}
619
620/**
621 * Both addKeyword and addValue must already be in canonical form.
622 * Either both addKeyword and addValue are NULL, or neither is NULL.
623 * If they are not NULL they must be zero terminated.
624 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
625 */
626static int32_t
627_getKeywords(const char *localeID,
628             char prev,
629             char *keywords, int32_t keywordCapacity,
630             char *values, int32_t valuesCapacity, int32_t *valLen,
631             UBool valuesToo,
632             const char* addKeyword,
633             const char* addValue,
634             UErrorCode *status)
635{
636    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
637
638    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
639    int32_t numKeywords = 0;
640    const char* pos = localeID;
641    const char* equalSign = NULL;
642    const char* semicolon = NULL;
643    int32_t i = 0, j, n;
644    int32_t keywordsLen = 0;
645    int32_t valuesLen = 0;
646
647    if(prev == '@') { /* start of keyword definition */
648        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
649        do {
650            UBool duplicate = FALSE;
651            /* skip leading spaces */
652            while(*pos == ' ') {
653                pos++;
654            }
655            if (!*pos) { /* handle trailing "; " */
656                break;
657            }
658            if(numKeywords == maxKeywords) {
659                *status = U_INTERNAL_PROGRAM_ERROR;
660                return 0;
661            }
662            equalSign = uprv_strchr(pos, '=');
663            semicolon = uprv_strchr(pos, ';');
664            /* lack of '=' [foo@currency] is illegal */
665            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
666            if(!equalSign || (semicolon && semicolon<equalSign)) {
667                *status = U_INVALID_FORMAT_ERROR;
668                return 0;
669            }
670            /* need to normalize both keyword and keyword name */
671            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
672                /* keyword name too long for internal buffer */
673                *status = U_INTERNAL_PROGRAM_ERROR;
674                return 0;
675            }
676            for(i = 0, n = 0; i < equalSign - pos; ++i) {
677                if (pos[i] != ' ') {
678                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
679                }
680            }
681
682            /* zero-length keyword is an error. */
683            if (n == 0) {
684                *status = U_INVALID_FORMAT_ERROR;
685                return 0;
686            }
687
688            keywordList[numKeywords].keyword[n] = 0;
689            keywordList[numKeywords].keywordLen = n;
690            /* now grab the value part. First we skip the '=' */
691            equalSign++;
692            /* then we leading spaces */
693            while(*equalSign == ' ') {
694                equalSign++;
695            }
696
697            /* Premature end or zero-length value */
698            if (!equalSign || equalSign == semicolon) {
699                *status = U_INVALID_FORMAT_ERROR;
700                return 0;
701            }
702
703            keywordList[numKeywords].valueStart = equalSign;
704
705            pos = semicolon;
706            i = 0;
707            if(pos) {
708                while(*(pos - i - 1) == ' ') {
709                    i++;
710                }
711                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
712                pos++;
713            } else {
714                i = (int32_t)uprv_strlen(equalSign);
715                while(i && equalSign[i-1] == ' ') {
716                    i--;
717                }
718                keywordList[numKeywords].valueLen = i;
719            }
720            /* If this is a duplicate keyword, then ignore it */
721            for (j=0; j<numKeywords; ++j) {
722                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
723                    duplicate = TRUE;
724                    break;
725                }
726            }
727            if (!duplicate) {
728                ++numKeywords;
729            }
730        } while(pos);
731
732        /* Handle addKeyword/addValue. */
733        if (addKeyword != NULL) {
734            UBool duplicate = FALSE;
735            U_ASSERT(addValue != NULL);
736            /* Search for duplicate; if found, do nothing. Explicit keyword
737               overrides addKeyword. */
738            for (j=0; j<numKeywords; ++j) {
739                if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
740                    duplicate = TRUE;
741                    break;
742                }
743            }
744            if (!duplicate) {
745                if (numKeywords == maxKeywords) {
746                    *status = U_INTERNAL_PROGRAM_ERROR;
747                    return 0;
748                }
749                uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
750                keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
751                keywordList[numKeywords].valueStart = addValue;
752                keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
753                ++numKeywords;
754            }
755        } else {
756            U_ASSERT(addValue == NULL);
757        }
758
759        /* now we have a list of keywords */
760        /* we need to sort it */
761        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
762
763        /* Now construct the keyword part */
764        for(i = 0; i < numKeywords; i++) {
765            if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
766                uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
767                if(valuesToo) {
768                    keywords[keywordsLen + keywordList[i].keywordLen] = '=';
769                } else {
770                    keywords[keywordsLen + keywordList[i].keywordLen] = 0;
771                }
772            }
773            keywordsLen += keywordList[i].keywordLen + 1;
774            if(valuesToo) {
775                if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
776                    uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
777                }
778                keywordsLen += keywordList[i].valueLen;
779
780                if(i < numKeywords - 1) {
781                    if(keywordsLen < keywordCapacity) {
782                        keywords[keywordsLen] = ';';
783                    }
784                    keywordsLen++;
785                }
786            }
787            if(values) {
788                if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
789                    uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
790                    values[valuesLen + keywordList[i].valueLen] = 0;
791                }
792                valuesLen += keywordList[i].valueLen + 1;
793            }
794        }
795        if(values) {
796            values[valuesLen] = 0;
797            if(valLen) {
798                *valLen = valuesLen;
799            }
800        }
801        return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
802    } else {
803        return 0;
804    }
805}
806
807U_CFUNC int32_t
808locale_getKeywords(const char *localeID,
809                   char prev,
810                   char *keywords, int32_t keywordCapacity,
811                   char *values, int32_t valuesCapacity, int32_t *valLen,
812                   UBool valuesToo,
813                   UErrorCode *status) {
814    return _getKeywords(localeID, prev, keywords, keywordCapacity,
815                        values, valuesCapacity, valLen, valuesToo,
816                        NULL, NULL, status);
817}
818
819U_CAPI int32_t U_EXPORT2
820uloc_getKeywordValue(const char* localeID,
821                     const char* keywordName,
822                     char* buffer, int32_t bufferCapacity,
823                     UErrorCode* status)
824{
825    const char* startSearchHere = NULL;
826    const char* nextSeparator = NULL;
827    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
828    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
829    int32_t i = 0;
830    int32_t result = 0;
831
832    if(status && U_SUCCESS(*status) && localeID) {
833      char tempBuffer[ULOC_FULLNAME_CAPACITY];
834      const char* tmpLocaleID;
835
836      if (_hasBCP47Extension(localeID)) {
837          _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
838      } else {
839          tmpLocaleID=localeID;
840      }
841
842      startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
843      if(startSearchHere == NULL) {
844          /* no keywords, return at once */
845          return 0;
846      }
847
848      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
849      if(U_FAILURE(*status)) {
850        return 0;
851      }
852
853      /* find the first keyword */
854      while(startSearchHere) {
855          startSearchHere++;
856          /* skip leading spaces (allowed?) */
857          while(*startSearchHere == ' ') {
858              startSearchHere++;
859          }
860          nextSeparator = uprv_strchr(startSearchHere, '=');
861          /* need to normalize both keyword and keyword name */
862          if(!nextSeparator) {
863              break;
864          }
865          if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
866              /* keyword name too long for internal buffer */
867              *status = U_INTERNAL_PROGRAM_ERROR;
868              return 0;
869          }
870          for(i = 0; i < nextSeparator - startSearchHere; i++) {
871              localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
872          }
873          /* trim trailing spaces */
874          while(startSearchHere[i-1] == ' ') {
875              i--;
876              U_ASSERT(i>=0);
877          }
878          localeKeywordNameBuffer[i] = 0;
879
880          startSearchHere = uprv_strchr(nextSeparator, ';');
881
882          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
883              nextSeparator++;
884              while(*nextSeparator == ' ') {
885                  nextSeparator++;
886              }
887              /* we actually found the keyword. Copy the value */
888              if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
889                  while(*(startSearchHere-1) == ' ') {
890                      startSearchHere--;
891                  }
892                  uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
893                  result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
894              } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
895                  i = (int32_t)uprv_strlen(nextSeparator);
896                  while(nextSeparator[i - 1] == ' ') {
897                      i--;
898                  }
899                  uprv_strncpy(buffer, nextSeparator, i);
900                  result = u_terminateChars(buffer, bufferCapacity, i, status);
901              } else {
902                  /* give a bigger buffer, please */
903                  *status = U_BUFFER_OVERFLOW_ERROR;
904                  if(startSearchHere) {
905                      result = (int32_t)(startSearchHere - nextSeparator);
906                  } else {
907                      result = (int32_t)uprv_strlen(nextSeparator);
908                  }
909              }
910              return result;
911          }
912      }
913    }
914    return 0;
915}
916
917U_CAPI int32_t U_EXPORT2
918uloc_setKeywordValue(const char* keywordName,
919                     const char* keywordValue,
920                     char* buffer, int32_t bufferCapacity,
921                     UErrorCode* status)
922{
923    /* TODO: sorting. removal. */
924    int32_t keywordNameLen;
925    int32_t keywordValueLen;
926    int32_t bufLen;
927    int32_t needLen = 0;
928    int32_t foundValueLen;
929    int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
930    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
931    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
932    int32_t i = 0;
933    int32_t rc;
934    char* nextSeparator = NULL;
935    char* nextEqualsign = NULL;
936    char* startSearchHere = NULL;
937    char* keywordStart = NULL;
938    char *insertHere = NULL;
939    if(U_FAILURE(*status)) {
940        return -1;
941    }
942    if(bufferCapacity>1) {
943        bufLen = (int32_t)uprv_strlen(buffer);
944    } else {
945        *status = U_ILLEGAL_ARGUMENT_ERROR;
946        return 0;
947    }
948    if(bufferCapacity<bufLen) {
949        /* The capacity is less than the length?! Is this NULL terminated? */
950        *status = U_ILLEGAL_ARGUMENT_ERROR;
951        return 0;
952    }
953    if(keywordValue && !*keywordValue) {
954        keywordValue = NULL;
955    }
956    if(keywordValue) {
957        keywordValueLen = (int32_t)uprv_strlen(keywordValue);
958    } else {
959        keywordValueLen = 0;
960    }
961    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
962    if(U_FAILURE(*status)) {
963        return 0;
964    }
965    startSearchHere = (char*)locale_getKeywordsStart(buffer);
966    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
967        if(!keywordValue) { /* no keywords = nothing to remove */
968            return bufLen;
969        }
970
971        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
972        if(startSearchHere) { /* had a single @ */
973            needLen--; /* already had the @ */
974            /* startSearchHere points at the @ */
975        } else {
976            startSearchHere=buffer+bufLen;
977        }
978        if(needLen >= bufferCapacity) {
979            *status = U_BUFFER_OVERFLOW_ERROR;
980            return needLen; /* no change */
981        }
982        *startSearchHere = '@';
983        startSearchHere++;
984        uprv_strcpy(startSearchHere, keywordNameBuffer);
985        startSearchHere += keywordNameLen;
986        *startSearchHere = '=';
987        startSearchHere++;
988        uprv_strcpy(startSearchHere, keywordValue);
989        startSearchHere+=keywordValueLen;
990        return needLen;
991    } /* end shortcut - no @ */
992
993    keywordStart = startSearchHere;
994    /* search for keyword */
995    while(keywordStart) {
996        keywordStart++;
997        /* skip leading spaces (allowed?) */
998        while(*keywordStart == ' ') {
999            keywordStart++;
1000        }
1001        nextEqualsign = uprv_strchr(keywordStart, '=');
1002        /* need to normalize both keyword and keyword name */
1003        if(!nextEqualsign) {
1004            break;
1005        }
1006        if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
1007            /* keyword name too long for internal buffer */
1008            *status = U_INTERNAL_PROGRAM_ERROR;
1009            return 0;
1010        }
1011        for(i = 0; i < nextEqualsign - keywordStart; i++) {
1012            localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
1013        }
1014        /* trim trailing spaces */
1015        while(keywordStart[i-1] == ' ') {
1016            i--;
1017        }
1018        U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN);
1019        localeKeywordNameBuffer[i] = 0;
1020
1021        nextSeparator = uprv_strchr(nextEqualsign, ';');
1022        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1023        if(rc == 0) {
1024            nextEqualsign++;
1025            while(*nextEqualsign == ' ') {
1026                nextEqualsign++;
1027            }
1028            /* we actually found the keyword. Change the value */
1029            if (nextSeparator) {
1030                keywordAtEnd = 0;
1031                foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
1032            } else {
1033                keywordAtEnd = 1;
1034                foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1035            }
1036            if(keywordValue) { /* adding a value - not removing */
1037              if(foundValueLen == keywordValueLen) {
1038                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1039                return bufLen; /* no change in size */
1040              } else if(foundValueLen > keywordValueLen) {
1041                int32_t delta = foundValueLen - keywordValueLen;
1042                if(nextSeparator) { /* RH side */
1043                  uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1044                }
1045                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1046                bufLen -= delta;
1047                buffer[bufLen]=0;
1048                return bufLen;
1049              } else { /* FVL < KVL */
1050                int32_t delta = keywordValueLen - foundValueLen;
1051                if((bufLen+delta) >= bufferCapacity) {
1052                  *status = U_BUFFER_OVERFLOW_ERROR;
1053                  return bufLen+delta;
1054                }
1055                if(nextSeparator) { /* RH side */
1056                  uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1057                }
1058                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1059                bufLen += delta;
1060                buffer[bufLen]=0;
1061                return bufLen;
1062              }
1063            } else { /* removing a keyword */
1064              if(keywordAtEnd) {
1065                /* zero out the ';' or '@' just before startSearchhere */
1066                keywordStart[-1] = 0;
1067                return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1068              } else {
1069                uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1070                keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1071                return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1072              }
1073            }
1074        } else if(rc<0){ /* end match keyword */
1075          /* could insert at this location. */
1076          insertHere = keywordStart;
1077        }
1078        keywordStart = nextSeparator;
1079    } /* end loop searching */
1080
1081    if(!keywordValue) {
1082      return bufLen; /* removal of non-extant keyword - no change */
1083    }
1084
1085    /* we know there is at least one keyword. */
1086    needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1087    if(needLen >= bufferCapacity) {
1088        *status = U_BUFFER_OVERFLOW_ERROR;
1089        return needLen; /* no change */
1090    }
1091
1092    if(insertHere) {
1093      uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1094      keywordStart = insertHere;
1095    } else {
1096      keywordStart = buffer+bufLen;
1097      *keywordStart = ';';
1098      keywordStart++;
1099    }
1100    uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1101    keywordStart += keywordNameLen;
1102    *keywordStart = '=';
1103    keywordStart++;
1104    uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1105    keywordStart+=keywordValueLen;
1106    if(insertHere) {
1107      *keywordStart = ';';
1108      keywordStart++;
1109    }
1110    buffer[needLen]=0;
1111    return needLen;
1112}
1113
1114/* ### ID parsing implementation **************************************************/
1115
1116#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1117
1118/*returns TRUE if one of the special prefixes is here (s=string)
1119  'x-' or 'i-' */
1120#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1121
1122/* Dot terminates it because of POSIX form  where dot precedes the codepage
1123 * except for variant
1124 */
1125#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1126
1127static char* _strnchr(const char* str, int32_t len, char c) {
1128    U_ASSERT(str != 0 && len >= 0);
1129    while (len-- != 0) {
1130        char d = *str;
1131        if (d == c) {
1132            return (char*) str;
1133        } else if (d == 0) {
1134            break;
1135        }
1136        ++str;
1137    }
1138    return NULL;
1139}
1140
1141/**
1142 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1143 * a NULL entry, followed by more entries, and a second NULL entry.
1144 *
1145 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1146 * COUNTRIES_3.
1147 */
1148static int16_t _findIndex(const char* const* list, const char* key)
1149{
1150    const char* const* anchor = list;
1151    int32_t pass = 0;
1152
1153    /* Make two passes through two NULL-terminated arrays at 'list' */
1154    while (pass++ < 2) {
1155        while (*list) {
1156            if (uprv_strcmp(key, *list) == 0) {
1157                return (int16_t)(list - anchor);
1158            }
1159            list++;
1160        }
1161        ++list;     /* skip final NULL *CWB*/
1162    }
1163    return -1;
1164}
1165
1166/* count the length of src while copying it to dest; return strlen(src) */
1167static inline int32_t
1168_copyCount(char *dest, int32_t destCapacity, const char *src) {
1169    const char *anchor;
1170    char c;
1171
1172    anchor=src;
1173    for(;;) {
1174        if((c=*src)==0) {
1175            return (int32_t)(src-anchor);
1176        }
1177        if(destCapacity<=0) {
1178            return (int32_t)((src-anchor)+uprv_strlen(src));
1179        }
1180        ++src;
1181        *dest++=c;
1182        --destCapacity;
1183    }
1184}
1185
1186U_CFUNC const char*
1187uloc_getCurrentCountryID(const char* oldID){
1188    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1189    if (offset >= 0) {
1190        return REPLACEMENT_COUNTRIES[offset];
1191    }
1192    return oldID;
1193}
1194U_CFUNC const char*
1195uloc_getCurrentLanguageID(const char* oldID){
1196    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1197    if (offset >= 0) {
1198        return REPLACEMENT_LANGUAGES[offset];
1199    }
1200    return oldID;
1201}
1202/*
1203 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1204 * avoid duplicating code to handle the earlier locale ID pieces
1205 * in the functions for the later ones by
1206 * setting the *pEnd pointer to where they stopped parsing
1207 *
1208 * TODO try to use this in Locale
1209 */
1210U_CFUNC int32_t
1211ulocimp_getLanguage(const char *localeID,
1212                    char *language, int32_t languageCapacity,
1213                    const char **pEnd) {
1214    int32_t i=0;
1215    int32_t offset;
1216    char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1217
1218    /* if it starts with i- or x- then copy that prefix */
1219    if(_isIDPrefix(localeID)) {
1220        if(i<languageCapacity) {
1221            language[i]=(char)uprv_tolower(*localeID);
1222        }
1223        if(i<languageCapacity) {
1224            language[i+1]='-';
1225        }
1226        i+=2;
1227        localeID+=2;
1228    }
1229
1230    /* copy the language as far as possible and count its length */
1231    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1232        if(i<languageCapacity) {
1233            language[i]=(char)uprv_tolower(*localeID);
1234        }
1235        if(i<3) {
1236            U_ASSERT(i>=0);
1237            lang[i]=(char)uprv_tolower(*localeID);
1238        }
1239        i++;
1240        localeID++;
1241    }
1242
1243    if(i==3) {
1244        /* convert 3 character code to 2 character code if possible *CWB*/
1245        offset=_findIndex(LANGUAGES_3, lang);
1246        if(offset>=0) {
1247            i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1248        }
1249    }
1250
1251    if(pEnd!=NULL) {
1252        *pEnd=localeID;
1253    }
1254    return i;
1255}
1256
1257U_CFUNC int32_t
1258ulocimp_getScript(const char *localeID,
1259                  char *script, int32_t scriptCapacity,
1260                  const char **pEnd)
1261{
1262    int32_t idLen = 0;
1263
1264    if (pEnd != NULL) {
1265        *pEnd = localeID;
1266    }
1267
1268    /* copy the second item as far as possible and count its length */
1269    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1270            && uprv_isASCIILetter(localeID[idLen])) {
1271        idLen++;
1272    }
1273
1274    /* If it's exactly 4 characters long, then it's a script and not a country. */
1275    if (idLen == 4) {
1276        int32_t i;
1277        if (pEnd != NULL) {
1278            *pEnd = localeID+idLen;
1279        }
1280        if(idLen > scriptCapacity) {
1281            idLen = scriptCapacity;
1282        }
1283        if (idLen >= 1) {
1284            script[0]=(char)uprv_toupper(*(localeID++));
1285        }
1286        for (i = 1; i < idLen; i++) {
1287            script[i]=(char)uprv_tolower(*(localeID++));
1288        }
1289    }
1290    else {
1291        idLen = 0;
1292    }
1293    return idLen;
1294}
1295
1296U_CFUNC int32_t
1297ulocimp_getCountry(const char *localeID,
1298                   char *country, int32_t countryCapacity,
1299                   const char **pEnd)
1300{
1301    int32_t idLen=0;
1302    char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1303    int32_t offset;
1304
1305    /* copy the country as far as possible and count its length */
1306    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1307        if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1308            cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1309        }
1310        idLen++;
1311    }
1312
1313    /* the country should be either length 2 or 3 */
1314    if (idLen == 2 || idLen == 3) {
1315        UBool gotCountry = FALSE;
1316        /* convert 3 character code to 2 character code if possible *CWB*/
1317        if(idLen==3) {
1318            offset=_findIndex(COUNTRIES_3, cnty);
1319            if(offset>=0) {
1320                idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1321                gotCountry = TRUE;
1322            }
1323        }
1324        if (!gotCountry) {
1325            int32_t i = 0;
1326            for (i = 0; i < idLen; i++) {
1327                if (i < countryCapacity) {
1328                    country[i]=(char)uprv_toupper(localeID[i]);
1329                }
1330            }
1331        }
1332        localeID+=idLen;
1333    } else {
1334        idLen = 0;
1335    }
1336
1337    if(pEnd!=NULL) {
1338        *pEnd=localeID;
1339    }
1340
1341    return idLen;
1342}
1343
1344/**
1345 * @param needSeparator if true, then add leading '_' if any variants
1346 * are added to 'variant'
1347 */
1348static int32_t
1349_getVariantEx(const char *localeID,
1350              char prev,
1351              char *variant, int32_t variantCapacity,
1352              UBool needSeparator) {
1353    int32_t i=0;
1354
1355    /* get one or more variant tags and separate them with '_' */
1356    if(_isIDSeparator(prev)) {
1357        /* get a variant string after a '-' or '_' */
1358        while(!_isTerminator(*localeID)) {
1359            if (needSeparator) {
1360                if (i<variantCapacity) {
1361                    variant[i] = '_';
1362                }
1363                ++i;
1364                needSeparator = FALSE;
1365            }
1366            if(i<variantCapacity) {
1367                variant[i]=(char)uprv_toupper(*localeID);
1368                if(variant[i]=='-') {
1369                    variant[i]='_';
1370                }
1371            }
1372            i++;
1373            localeID++;
1374        }
1375    }
1376
1377    /* if there is no variant tag after a '-' or '_' then look for '@' */
1378    if(i==0) {
1379        if(prev=='@') {
1380            /* keep localeID */
1381        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1382            ++localeID; /* point after the '@' */
1383        } else {
1384            return 0;
1385        }
1386        while(!_isTerminator(*localeID)) {
1387            if (needSeparator) {
1388                if (i<variantCapacity) {
1389                    variant[i] = '_';
1390                }
1391                ++i;
1392                needSeparator = FALSE;
1393            }
1394            if(i<variantCapacity) {
1395                variant[i]=(char)uprv_toupper(*localeID);
1396                if(variant[i]=='-' || variant[i]==',') {
1397                    variant[i]='_';
1398                }
1399            }
1400            i++;
1401            localeID++;
1402        }
1403    }
1404
1405    return i;
1406}
1407
1408static int32_t
1409_getVariant(const char *localeID,
1410            char prev,
1411            char *variant, int32_t variantCapacity) {
1412    return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1413}
1414
1415/**
1416 * Delete ALL instances of a variant from the given list of one or
1417 * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1418 * @param variants the source string of one or more variants,
1419 * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1420 * terminated; if it is, trailing zero will NOT be maintained.
1421 * @param variantsLen length of variants
1422 * @param toDelete variant to delete, without separators, e.g.  "EURO"
1423 * or "PREEURO"; not zero terminated
1424 * @param toDeleteLen length of toDelete
1425 * @return number of characters deleted from variants
1426 */
1427static int32_t
1428_deleteVariant(char* variants, int32_t variantsLen,
1429               const char* toDelete, int32_t toDeleteLen)
1430{
1431    int32_t delta = 0; /* number of chars deleted */
1432    for (;;) {
1433        UBool flag = FALSE;
1434        if (variantsLen < toDeleteLen) {
1435            return delta;
1436        }
1437        if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1438            (variantsLen == toDeleteLen ||
1439             (flag=(variants[toDeleteLen] == '_'))))
1440        {
1441            int32_t d = toDeleteLen + (flag?1:0);
1442            variantsLen -= d;
1443            delta += d;
1444            if (variantsLen > 0) {
1445                uprv_memmove(variants, variants+d, variantsLen);
1446            }
1447        } else {
1448            char* p = _strnchr(variants, variantsLen, '_');
1449            if (p == NULL) {
1450                return delta;
1451            }
1452            ++p;
1453            variantsLen -= (int32_t)(p - variants);
1454            variants = p;
1455        }
1456    }
1457}
1458
1459/* Keyword enumeration */
1460
1461typedef struct UKeywordsContext {
1462    char* keywords;
1463    char* current;
1464} UKeywordsContext;
1465
1466static void U_CALLCONV
1467uloc_kw_closeKeywords(UEnumeration *enumerator) {
1468    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1469    uprv_free(enumerator->context);
1470    uprv_free(enumerator);
1471}
1472
1473static int32_t U_CALLCONV
1474uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1475    char *kw = ((UKeywordsContext *)en->context)->keywords;
1476    int32_t result = 0;
1477    while(*kw) {
1478        result++;
1479        kw += uprv_strlen(kw)+1;
1480    }
1481    return result;
1482}
1483
1484static const char* U_CALLCONV
1485uloc_kw_nextKeyword(UEnumeration* en,
1486                    int32_t* resultLength,
1487                    UErrorCode* /*status*/) {
1488    const char* result = ((UKeywordsContext *)en->context)->current;
1489    int32_t len = 0;
1490    if(*result) {
1491        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1492        ((UKeywordsContext *)en->context)->current += len+1;
1493    } else {
1494        result = NULL;
1495    }
1496    if (resultLength) {
1497        *resultLength = len;
1498    }
1499    return result;
1500}
1501
1502static void U_CALLCONV
1503uloc_kw_resetKeywords(UEnumeration* en,
1504                      UErrorCode* /*status*/) {
1505    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1506}
1507
1508static const UEnumeration gKeywordsEnum = {
1509    NULL,
1510    NULL,
1511    uloc_kw_closeKeywords,
1512    uloc_kw_countKeywords,
1513    uenum_unextDefault,
1514    uloc_kw_nextKeyword,
1515    uloc_kw_resetKeywords
1516};
1517
1518U_CAPI UEnumeration* U_EXPORT2
1519uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1520{
1521    UKeywordsContext *myContext = NULL;
1522    UEnumeration *result = NULL;
1523
1524    if(U_FAILURE(*status)) {
1525        return NULL;
1526    }
1527    result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1528    /* Null pointer test */
1529    if (result == NULL) {
1530        *status = U_MEMORY_ALLOCATION_ERROR;
1531        return NULL;
1532    }
1533    uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1534    myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1535    if (myContext == NULL) {
1536        *status = U_MEMORY_ALLOCATION_ERROR;
1537        uprv_free(result);
1538        return NULL;
1539    }
1540    myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1541    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1542    myContext->keywords[keywordListSize] = 0;
1543    myContext->current = myContext->keywords;
1544    result->context = myContext;
1545    return result;
1546}
1547
1548U_CAPI UEnumeration* U_EXPORT2
1549uloc_openKeywords(const char* localeID,
1550                        UErrorCode* status)
1551{
1552    int32_t i=0;
1553    char keywords[256];
1554    int32_t keywordsCapacity = 256;
1555    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1556    const char* tmpLocaleID;
1557
1558    if(status==NULL || U_FAILURE(*status)) {
1559        return 0;
1560    }
1561
1562    if (_hasBCP47Extension(localeID)) {
1563        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1564    } else {
1565        if (localeID==NULL) {
1566           localeID=uloc_getDefault();
1567        }
1568        tmpLocaleID=localeID;
1569    }
1570
1571    /* Skip the language */
1572    ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1573    if(_isIDSeparator(*tmpLocaleID)) {
1574        const char *scriptID;
1575        /* Skip the script if available */
1576        ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1577        if(scriptID != tmpLocaleID+1) {
1578            /* Found optional script */
1579            tmpLocaleID = scriptID;
1580        }
1581        /* Skip the Country */
1582        if (_isIDSeparator(*tmpLocaleID)) {
1583            ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1584            if(_isIDSeparator(*tmpLocaleID)) {
1585                _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1586            }
1587        }
1588    }
1589
1590    /* keywords are located after '@' */
1591    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1592        i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1593    }
1594
1595    if(i) {
1596        return uloc_openKeywordList(keywords, i, status);
1597    } else {
1598        return NULL;
1599    }
1600}
1601
1602
1603/* bit-flags for 'options' parameter of _canonicalize */
1604#define _ULOC_STRIP_KEYWORDS 0x2
1605#define _ULOC_CANONICALIZE   0x1
1606
1607#define OPTION_SET(options, mask) ((options & mask) != 0)
1608
1609static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1610#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1611
1612/**
1613 * Canonicalize the given localeID, to level 1 or to level 2,
1614 * depending on the options.  To specify level 1, pass in options=0.
1615 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1616 *
1617 * This is the code underlying uloc_getName and uloc_canonicalize.
1618 */
1619static int32_t
1620_canonicalize(const char* localeID,
1621              char* result,
1622              int32_t resultCapacity,
1623              uint32_t options,
1624              UErrorCode* err) {
1625    int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1626    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1627    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1628    const char* origLocaleID;
1629    const char* tmpLocaleID;
1630    const char* keywordAssign = NULL;
1631    const char* separatorIndicator = NULL;
1632    const char* addKeyword = NULL;
1633    const char* addValue = NULL;
1634    char* name;
1635    char* variant = NULL; /* pointer into name, or NULL */
1636
1637    if (U_FAILURE(*err)) {
1638        return 0;
1639    }
1640
1641    if (_hasBCP47Extension(localeID)) {
1642        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1643    } else {
1644        if (localeID==NULL) {
1645           localeID=uloc_getDefault();
1646        }
1647        tmpLocaleID=localeID;
1648    }
1649
1650    origLocaleID=tmpLocaleID;
1651
1652    /* if we are doing a full canonicalization, then put results in
1653       localeBuffer, if necessary; otherwise send them to result. */
1654    if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1655        (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1656        name = localeBuffer;
1657        nameCapacity = (int32_t)sizeof(localeBuffer);
1658    } else {
1659        name = result;
1660        nameCapacity = resultCapacity;
1661    }
1662
1663    /* get all pieces, one after another, and separate with '_' */
1664    len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1665
1666    if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1667        const char *d = uloc_getDefault();
1668
1669        len = (int32_t)uprv_strlen(d);
1670
1671        if (name != NULL) {
1672            uprv_strncpy(name, d, len);
1673        }
1674    } else if(_isIDSeparator(*tmpLocaleID)) {
1675        const char *scriptID;
1676
1677        ++fieldCount;
1678        if(len<nameCapacity) {
1679            name[len]='_';
1680        }
1681        ++len;
1682
1683        scriptSize=ulocimp_getScript(tmpLocaleID+1,
1684            (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1685        if(scriptSize > 0) {
1686            /* Found optional script */
1687            tmpLocaleID = scriptID;
1688            ++fieldCount;
1689            len+=scriptSize;
1690            if (_isIDSeparator(*tmpLocaleID)) {
1691                /* If there is something else, then we add the _ */
1692                if(len<nameCapacity) {
1693                    name[len]='_';
1694                }
1695                ++len;
1696            }
1697        }
1698
1699        if (_isIDSeparator(*tmpLocaleID)) {
1700            const char *cntryID;
1701            int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1702                (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1703            if (cntrySize > 0) {
1704                /* Found optional country */
1705                tmpLocaleID = cntryID;
1706                len+=cntrySize;
1707            }
1708            if(_isIDSeparator(*tmpLocaleID)) {
1709                /* If there is something else, then we add the _  if we found country before. */
1710                if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1711                    ++fieldCount;
1712                    if(len<nameCapacity) {
1713                        name[len]='_';
1714                    }
1715                    ++len;
1716                }
1717
1718                variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1719                    (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1720                if (variantSize > 0) {
1721                    variant = len<nameCapacity ? name+len : NULL;
1722                    len += variantSize;
1723                    tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1724                }
1725            }
1726        }
1727    }
1728
1729    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1730    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1731        UBool done = FALSE;
1732        do {
1733            char c = *tmpLocaleID;
1734            switch (c) {
1735            case 0:
1736            case '@':
1737                done = TRUE;
1738                break;
1739            default:
1740                if (len<nameCapacity) {
1741                    name[len] = c;
1742                }
1743                ++len;
1744                ++tmpLocaleID;
1745                break;
1746            }
1747        } while (!done);
1748    }
1749
1750    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1751       After this, tmpLocaleID either points to '@' or is NULL */
1752    if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1753        keywordAssign = uprv_strchr(tmpLocaleID, '=');
1754        separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1755    }
1756
1757    /* Copy POSIX-style variant, if any [mr@FOO] */
1758    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1759        tmpLocaleID != NULL && keywordAssign == NULL) {
1760        for (;;) {
1761            char c = *tmpLocaleID;
1762            if (c == 0) {
1763                break;
1764            }
1765            if (len<nameCapacity) {
1766                name[len] = c;
1767            }
1768            ++len;
1769            ++tmpLocaleID;
1770        }
1771    }
1772
1773    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1774        /* Handle @FOO variant if @ is present and not followed by = */
1775        if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1776            int32_t posixVariantSize;
1777            /* Add missing '_' if needed */
1778            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1779                do {
1780                    if(len<nameCapacity) {
1781                        name[len]='_';
1782                    }
1783                    ++len;
1784                    ++fieldCount;
1785                } while(fieldCount<2);
1786            }
1787            posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1788                                             (UBool)(variantSize > 0));
1789            if (posixVariantSize > 0) {
1790                if (variant == NULL) {
1791                    variant = name+len;
1792                }
1793                len += posixVariantSize;
1794                variantSize += posixVariantSize;
1795            }
1796        }
1797
1798        /* Handle generic variants first */
1799        if (variant) {
1800            for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1801                const char* variantToCompare = VARIANT_MAP[j].variant;
1802                int32_t n = (int32_t)uprv_strlen(variantToCompare);
1803                int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1804                len -= variantLen;
1805                if (variantLen > 0) {
1806                    if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1807                        --len;
1808                    }
1809                    addKeyword = VARIANT_MAP[j].keyword;
1810                    addValue = VARIANT_MAP[j].value;
1811                    break;
1812                }
1813            }
1814            if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1815                --len;
1816            }
1817        }
1818
1819        /* Look up the ID in the canonicalization map */
1820        for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1821            const char* id = CANONICALIZE_MAP[j].id;
1822            int32_t n = (int32_t)uprv_strlen(id);
1823            if (len == n && uprv_strncmp(name, id, n) == 0) {
1824                if (n == 0 && tmpLocaleID != NULL) {
1825                    break; /* Don't remap "" if keywords present */
1826                }
1827                len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1828                if (CANONICALIZE_MAP[j].keyword) {
1829                    addKeyword = CANONICALIZE_MAP[j].keyword;
1830                    addValue = CANONICALIZE_MAP[j].value;
1831                }
1832                break;
1833            }
1834        }
1835    }
1836
1837    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1838        if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1839            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1840            if(len<nameCapacity) {
1841                name[len]='@';
1842            }
1843            ++len;
1844            ++fieldCount;
1845            len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1846                                NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1847        } else if (addKeyword != NULL) {
1848            U_ASSERT(addValue != NULL && len < nameCapacity);
1849            /* inelegant but works -- later make _getKeywords do this? */
1850            len += _copyCount(name+len, nameCapacity-len, "@");
1851            len += _copyCount(name+len, nameCapacity-len, addKeyword);
1852            len += _copyCount(name+len, nameCapacity-len, "=");
1853            len += _copyCount(name+len, nameCapacity-len, addValue);
1854        }
1855    }
1856
1857    if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1858        uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1859    }
1860
1861    return u_terminateChars(result, resultCapacity, len, err);
1862}
1863
1864/* ### ID parsing API **************************************************/
1865
1866U_CAPI int32_t  U_EXPORT2
1867uloc_getParent(const char*    localeID,
1868               char* parent,
1869               int32_t parentCapacity,
1870               UErrorCode* err)
1871{
1872    const char *lastUnderscore;
1873    int32_t i;
1874
1875    if (U_FAILURE(*err))
1876        return 0;
1877
1878    if (localeID == NULL)
1879        localeID = uloc_getDefault();
1880
1881    lastUnderscore=uprv_strrchr(localeID, '_');
1882    if(lastUnderscore!=NULL) {
1883        i=(int32_t)(lastUnderscore-localeID);
1884    } else {
1885        i=0;
1886    }
1887
1888    if(i>0 && parent != localeID) {
1889        uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1890    }
1891    return u_terminateChars(parent, parentCapacity, i, err);
1892}
1893
1894U_CAPI int32_t U_EXPORT2
1895uloc_getLanguage(const char*    localeID,
1896         char* language,
1897         int32_t languageCapacity,
1898         UErrorCode* err)
1899{
1900    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1901    int32_t i=0;
1902
1903    if (err==NULL || U_FAILURE(*err)) {
1904        return 0;
1905    }
1906
1907    if(localeID==NULL) {
1908        localeID=uloc_getDefault();
1909    }
1910
1911    i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1912    return u_terminateChars(language, languageCapacity, i, err);
1913}
1914
1915U_CAPI int32_t U_EXPORT2
1916uloc_getScript(const char*    localeID,
1917         char* script,
1918         int32_t scriptCapacity,
1919         UErrorCode* err)
1920{
1921    int32_t i=0;
1922
1923    if(err==NULL || U_FAILURE(*err)) {
1924        return 0;
1925    }
1926
1927    if(localeID==NULL) {
1928        localeID=uloc_getDefault();
1929    }
1930
1931    /* skip the language */
1932    ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1933    if(_isIDSeparator(*localeID)) {
1934        i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
1935    }
1936    return u_terminateChars(script, scriptCapacity, i, err);
1937}
1938
1939U_CAPI int32_t  U_EXPORT2
1940uloc_getCountry(const char* localeID,
1941            char* country,
1942            int32_t countryCapacity,
1943            UErrorCode* err)
1944{
1945    int32_t i=0;
1946
1947    if(err==NULL || U_FAILURE(*err)) {
1948        return 0;
1949    }
1950
1951    if(localeID==NULL) {
1952        localeID=uloc_getDefault();
1953    }
1954
1955    /* Skip the language */
1956    ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1957    if(_isIDSeparator(*localeID)) {
1958        const char *scriptID;
1959        /* Skip the script if available */
1960        ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
1961        if(scriptID != localeID+1) {
1962            /* Found optional script */
1963            localeID = scriptID;
1964        }
1965        if(_isIDSeparator(*localeID)) {
1966            i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
1967        }
1968    }
1969    return u_terminateChars(country, countryCapacity, i, err);
1970}
1971
1972U_CAPI int32_t  U_EXPORT2
1973uloc_getVariant(const char* localeID,
1974                char* variant,
1975                int32_t variantCapacity,
1976                UErrorCode* err)
1977{
1978    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1979    const char* tmpLocaleID;
1980    int32_t i=0;
1981
1982    if(err==NULL || U_FAILURE(*err)) {
1983        return 0;
1984    }
1985
1986    if (_hasBCP47Extension(localeID)) {
1987        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1988    } else {
1989        if (localeID==NULL) {
1990           localeID=uloc_getDefault();
1991        }
1992        tmpLocaleID=localeID;
1993    }
1994
1995    /* Skip the language */
1996    ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1997    if(_isIDSeparator(*tmpLocaleID)) {
1998        const char *scriptID;
1999        /* Skip the script if available */
2000        ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2001        if(scriptID != tmpLocaleID+1) {
2002            /* Found optional script */
2003            tmpLocaleID = scriptID;
2004        }
2005        /* Skip the Country */
2006        if (_isIDSeparator(*tmpLocaleID)) {
2007            const char *cntryID;
2008            ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2009            if (cntryID != tmpLocaleID+1) {
2010                /* Found optional country */
2011                tmpLocaleID = cntryID;
2012            }
2013            if(_isIDSeparator(*tmpLocaleID)) {
2014                /* If there was no country ID, skip a possible extra IDSeparator */
2015                if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2016                    tmpLocaleID++;
2017                }
2018                i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2019            }
2020        }
2021    }
2022
2023    /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2024    /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2025/*
2026    if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2027        i=_getVariant(localeID+1, '@', variant, variantCapacity);
2028    }
2029*/
2030    return u_terminateChars(variant, variantCapacity, i, err);
2031}
2032
2033U_CAPI int32_t  U_EXPORT2
2034uloc_getName(const char* localeID,
2035             char* name,
2036             int32_t nameCapacity,
2037             UErrorCode* err)
2038{
2039    return _canonicalize(localeID, name, nameCapacity, 0, err);
2040}
2041
2042U_CAPI int32_t  U_EXPORT2
2043uloc_getBaseName(const char* localeID,
2044                 char* name,
2045                 int32_t nameCapacity,
2046                 UErrorCode* err)
2047{
2048    return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2049}
2050
2051U_CAPI int32_t  U_EXPORT2
2052uloc_canonicalize(const char* localeID,
2053                  char* name,
2054                  int32_t nameCapacity,
2055                  UErrorCode* err)
2056{
2057    return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2058}
2059
2060U_CAPI const char*  U_EXPORT2
2061uloc_getISO3Language(const char* localeID)
2062{
2063    int16_t offset;
2064    char lang[ULOC_LANG_CAPACITY];
2065    UErrorCode err = U_ZERO_ERROR;
2066
2067    if (localeID == NULL)
2068    {
2069        localeID = uloc_getDefault();
2070    }
2071    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2072    if (U_FAILURE(err))
2073        return "";
2074    offset = _findIndex(LANGUAGES, lang);
2075    if (offset < 0)
2076        return "";
2077    return LANGUAGES_3[offset];
2078}
2079
2080U_CAPI const char*  U_EXPORT2
2081uloc_getISO3Country(const char* localeID)
2082{
2083    int16_t offset;
2084    char cntry[ULOC_LANG_CAPACITY];
2085    UErrorCode err = U_ZERO_ERROR;
2086
2087    if (localeID == NULL)
2088    {
2089        localeID = uloc_getDefault();
2090    }
2091    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2092    if (U_FAILURE(err))
2093        return "";
2094    offset = _findIndex(COUNTRIES, cntry);
2095    if (offset < 0)
2096        return "";
2097
2098    return COUNTRIES_3[offset];
2099}
2100
2101U_CAPI uint32_t  U_EXPORT2
2102uloc_getLCID(const char* localeID)
2103{
2104    UErrorCode status = U_ZERO_ERROR;
2105    char       langID[ULOC_FULLNAME_CAPACITY];
2106
2107    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2108    if (U_FAILURE(status)) {
2109        return 0;
2110    }
2111
2112    if (uprv_strchr(localeID, '@')) {
2113        // uprv_convertToLCID does not support keywords other than collation.
2114        // Remove all keywords except collation.
2115        int32_t len;
2116        char collVal[ULOC_KEYWORDS_CAPACITY];
2117        char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2118
2119        len = uloc_getKeywordValue(localeID, "collation", collVal,
2120            sizeof(collVal)/sizeof(collVal[0]) - 1, &status);
2121
2122        if (U_SUCCESS(status) && len > 0) {
2123            collVal[len] = 0;
2124
2125            len = uloc_getBaseName(localeID, tmpLocaleID,
2126                sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - 1, &status);
2127
2128            if (U_SUCCESS(status)) {
2129                tmpLocaleID[len] = 0;
2130
2131                len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2132                    sizeof(tmpLocaleID)/sizeof(tmpLocaleID[0]) - len - 1, &status);
2133
2134                if (U_SUCCESS(status)) {
2135                    tmpLocaleID[len] = 0;
2136                    return uprv_convertToLCID(langID, tmpLocaleID, &status);
2137                }
2138            }
2139        }
2140
2141        // fall through - all keywords are simply ignored
2142        status = U_ZERO_ERROR;
2143    }
2144
2145    return uprv_convertToLCID(langID, localeID, &status);
2146}
2147
2148U_CAPI int32_t U_EXPORT2
2149uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2150                UErrorCode *status)
2151{
2152    return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2153}
2154
2155/* ### Default locale **************************************************/
2156
2157U_CAPI const char*  U_EXPORT2
2158uloc_getDefault()
2159{
2160    return locale_get_default();
2161}
2162
2163U_CAPI void  U_EXPORT2
2164uloc_setDefault(const char*   newDefaultLocale,
2165             UErrorCode* err)
2166{
2167    if (U_FAILURE(*err))
2168        return;
2169    /* the error code isn't currently used for anything by this function*/
2170
2171    /* propagate change to C++ */
2172    locale_set_default(newDefaultLocale);
2173}
2174
2175/**
2176 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2177 * to an array of pointers to arrays of char.  All of these pointers are owned
2178 * by ICU-- do not delete them, and do not write through them.  The array is
2179 * terminated with a null pointer.
2180 */
2181U_CAPI const char* const*  U_EXPORT2
2182uloc_getISOLanguages()
2183{
2184    return LANGUAGES;
2185}
2186
2187/**
2188 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2189 * pointer to an array of pointers to arrays of char.  All of these pointers are
2190 * owned by ICU-- do not delete them, and do not write through them.  The array is
2191 * terminated with a null pointer.
2192 */
2193U_CAPI const char* const*  U_EXPORT2
2194uloc_getISOCountries()
2195{
2196    return COUNTRIES;
2197}
2198
2199
2200/* this function to be moved into cstring.c later */
2201static char gDecimal = 0;
2202
2203static /* U_CAPI */
2204double
2205/* U_EXPORT2 */
2206_uloc_strtod(const char *start, char **end) {
2207    char *decimal;
2208    char *myEnd;
2209    char buf[30];
2210    double rv;
2211    if (!gDecimal) {
2212        char rep[5];
2213        /* For machines that decide to change the decimal on you,
2214        and try to be too smart with localization.
2215        This normally should be just a '.'. */
2216        sprintf(rep, "%+1.1f", 1.0);
2217        gDecimal = rep[2];
2218    }
2219
2220    if(gDecimal == '.') {
2221        return uprv_strtod(start, end); /* fall through to OS */
2222    } else {
2223        uprv_strncpy(buf, start, 29);
2224        buf[29]=0;
2225        decimal = uprv_strchr(buf, '.');
2226        if(decimal) {
2227            *decimal = gDecimal;
2228        } else {
2229            return uprv_strtod(start, end); /* no decimal point */
2230        }
2231        rv = uprv_strtod(buf, &myEnd);
2232        if(end) {
2233            *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2234        }
2235        return rv;
2236    }
2237}
2238
2239typedef struct {
2240    float q;
2241    int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2242    char *locale;
2243} _acceptLangItem;
2244
2245static int32_t U_CALLCONV
2246uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2247{
2248    const _acceptLangItem *aa = (const _acceptLangItem*)a;
2249    const _acceptLangItem *bb = (const _acceptLangItem*)b;
2250
2251    int32_t rc = 0;
2252    if(bb->q < aa->q) {
2253        rc = -1;  /* A > B */
2254    } else if(bb->q > aa->q) {
2255        rc = 1;   /* A < B */
2256    } else {
2257        rc = 0;   /* A = B */
2258    }
2259
2260    if(rc==0) {
2261        rc = uprv_stricmp(aa->locale, bb->locale);
2262    }
2263
2264#if defined(ULOC_DEBUG)
2265    /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2266    aa->locale, aa->q,
2267    bb->locale, bb->q,
2268    rc);*/
2269#endif
2270
2271    return rc;
2272}
2273
2274/*
2275mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2276*/
2277
2278U_CAPI int32_t U_EXPORT2
2279uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2280                            const char *httpAcceptLanguage,
2281                            UEnumeration* availableLocales,
2282                            UErrorCode *status)
2283{
2284    _acceptLangItem *j;
2285    _acceptLangItem smallBuffer[30];
2286    char **strs;
2287    char tmp[ULOC_FULLNAME_CAPACITY +1];
2288    int32_t n = 0;
2289    const char *itemEnd;
2290    const char *paramEnd;
2291    const char *s;
2292    const char *t;
2293    int32_t res;
2294    int32_t i;
2295    int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2296    int32_t jSize;
2297    char *tempstr; /* Use for null pointer check */
2298
2299    j = smallBuffer;
2300    jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2301    if(U_FAILURE(*status)) {
2302        return -1;
2303    }
2304
2305    for(s=httpAcceptLanguage;s&&*s;) {
2306        while(isspace(*s)) /* eat space at the beginning */
2307            s++;
2308        itemEnd=uprv_strchr(s,',');
2309        paramEnd=uprv_strchr(s,';');
2310        if(!itemEnd) {
2311            itemEnd = httpAcceptLanguage+l; /* end of string */
2312        }
2313        if(paramEnd && paramEnd<itemEnd) {
2314            /* semicolon (;) is closer than end (,) */
2315            t = paramEnd+1;
2316            if(*t=='q') {
2317                t++;
2318            }
2319            while(isspace(*t)) {
2320                t++;
2321            }
2322            if(*t=='=') {
2323                t++;
2324            }
2325            while(isspace(*t)) {
2326                t++;
2327            }
2328            j[n].q = (float)_uloc_strtod(t,NULL);
2329        } else {
2330            /* no semicolon - it's 1.0 */
2331            j[n].q = 1.0f;
2332            paramEnd = itemEnd;
2333        }
2334        j[n].dummy=0;
2335        /* eat spaces prior to semi */
2336        for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2337            ;
2338        /* Check for null pointer from uprv_strndup */
2339        tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2340        if (tempstr == NULL) {
2341            *status = U_MEMORY_ALLOCATION_ERROR;
2342            return -1;
2343        }
2344        j[n].locale = tempstr;
2345        uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2346        if(strcmp(j[n].locale,tmp)) {
2347            uprv_free(j[n].locale);
2348            j[n].locale=uprv_strdup(tmp);
2349        }
2350#if defined(ULOC_DEBUG)
2351        /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2352#endif
2353        n++;
2354        s = itemEnd;
2355        while(*s==',') { /* eat duplicate commas */
2356            s++;
2357        }
2358        if(n>=jSize) {
2359            if(j==smallBuffer) {  /* overflowed the small buffer. */
2360                j = static_cast<_acceptLangItem *>(uprv_malloc(sizeof(j[0])*(jSize*2)));
2361                if(j!=NULL) {
2362                    uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2363                }
2364#if defined(ULOC_DEBUG)
2365                fprintf(stderr,"malloced at size %d\n", jSize);
2366#endif
2367            } else {
2368                j = static_cast<_acceptLangItem *>(uprv_realloc(j, sizeof(j[0])*jSize*2));
2369#if defined(ULOC_DEBUG)
2370                fprintf(stderr,"re-alloced at size %d\n", jSize);
2371#endif
2372            }
2373            jSize *= 2;
2374            if(j==NULL) {
2375                *status = U_MEMORY_ALLOCATION_ERROR;
2376                return -1;
2377            }
2378        }
2379    }
2380    uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2381    if(U_FAILURE(*status)) {
2382        if(j != smallBuffer) {
2383#if defined(ULOC_DEBUG)
2384            fprintf(stderr,"freeing j %p\n", j);
2385#endif
2386            uprv_free(j);
2387        }
2388        return -1;
2389    }
2390    strs = static_cast<char **>(uprv_malloc((size_t)(sizeof(strs[0])*n)));
2391    /* Check for null pointer */
2392    if (strs == NULL) {
2393        uprv_free(j); /* Free to avoid memory leak */
2394        *status = U_MEMORY_ALLOCATION_ERROR;
2395        return -1;
2396    }
2397    for(i=0;i<n;i++) {
2398#if defined(ULOC_DEBUG)
2399        /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2400#endif
2401        strs[i]=j[i].locale;
2402    }
2403    res =  uloc_acceptLanguage(result, resultAvailable, outResult,
2404        (const char**)strs, n, availableLocales, status);
2405    for(i=0;i<n;i++) {
2406        uprv_free(strs[i]);
2407    }
2408    uprv_free(strs);
2409    if(j != smallBuffer) {
2410#if defined(ULOC_DEBUG)
2411        fprintf(stderr,"freeing j %p\n", j);
2412#endif
2413        uprv_free(j);
2414    }
2415    return res;
2416}
2417
2418
2419U_CAPI int32_t U_EXPORT2
2420uloc_acceptLanguage(char *result, int32_t resultAvailable,
2421                    UAcceptResult *outResult, const char **acceptList,
2422                    int32_t acceptListCount,
2423                    UEnumeration* availableLocales,
2424                    UErrorCode *status)
2425{
2426    int32_t i,j;
2427    int32_t len;
2428    int32_t maxLen=0;
2429    char tmp[ULOC_FULLNAME_CAPACITY+1];
2430    const char *l;
2431    char **fallbackList;
2432    if(U_FAILURE(*status)) {
2433        return -1;
2434    }
2435    fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2436    if(fallbackList==NULL) {
2437        *status = U_MEMORY_ALLOCATION_ERROR;
2438        return -1;
2439    }
2440    for(i=0;i<acceptListCount;i++) {
2441#if defined(ULOC_DEBUG)
2442        fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2443#endif
2444        while((l=uenum_next(availableLocales, NULL, status))) {
2445#if defined(ULOC_DEBUG)
2446            fprintf(stderr,"  %s\n", l);
2447#endif
2448            len = (int32_t)uprv_strlen(l);
2449            if(!uprv_strcmp(acceptList[i], l)) {
2450                if(outResult) {
2451                    *outResult = ULOC_ACCEPT_VALID;
2452                }
2453#if defined(ULOC_DEBUG)
2454                fprintf(stderr, "MATCH! %s\n", l);
2455#endif
2456                if(len>0) {
2457                    uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2458                }
2459                for(j=0;j<i;j++) {
2460                    uprv_free(fallbackList[j]);
2461                }
2462                uprv_free(fallbackList);
2463                return u_terminateChars(result, resultAvailable, len, status);
2464            }
2465            if(len>maxLen) {
2466                maxLen = len;
2467            }
2468        }
2469        uenum_reset(availableLocales, status);
2470        /* save off parent info */
2471        if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2472            fallbackList[i] = uprv_strdup(tmp);
2473        } else {
2474            fallbackList[i]=0;
2475        }
2476    }
2477
2478    for(maxLen--;maxLen>0;maxLen--) {
2479        for(i=0;i<acceptListCount;i++) {
2480            if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2481#if defined(ULOC_DEBUG)
2482                fprintf(stderr,"Try: [%s]", fallbackList[i]);
2483#endif
2484                while((l=uenum_next(availableLocales, NULL, status))) {
2485#if defined(ULOC_DEBUG)
2486                    fprintf(stderr,"  %s\n", l);
2487#endif
2488                    len = (int32_t)uprv_strlen(l);
2489                    if(!uprv_strcmp(fallbackList[i], l)) {
2490                        if(outResult) {
2491                            *outResult = ULOC_ACCEPT_FALLBACK;
2492                        }
2493#if defined(ULOC_DEBUG)
2494                        fprintf(stderr, "fallback MATCH! %s\n", l);
2495#endif
2496                        if(len>0) {
2497                            uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2498                        }
2499                        for(j=0;j<acceptListCount;j++) {
2500                            uprv_free(fallbackList[j]);
2501                        }
2502                        uprv_free(fallbackList);
2503                        return u_terminateChars(result, resultAvailable, len, status);
2504                    }
2505                }
2506                uenum_reset(availableLocales, status);
2507
2508                if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2509                    uprv_free(fallbackList[i]);
2510                    fallbackList[i] = uprv_strdup(tmp);
2511                } else {
2512                    uprv_free(fallbackList[i]);
2513                    fallbackList[i]=0;
2514                }
2515            }
2516        }
2517        if(outResult) {
2518            *outResult = ULOC_ACCEPT_FAILED;
2519        }
2520    }
2521    for(i=0;i<acceptListCount;i++) {
2522        uprv_free(fallbackList[i]);
2523    }
2524    uprv_free(fallbackList);
2525    return -1;
2526}
2527
2528/*eof*/
2529