uloc.c revision b13da9df870a61b11249bf741347908dbea0edd8
1/*
2**********************************************************************
3*   Copyright (C) 1997-2007, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   04/01/97    aliu        Creation.
13*   08/21/98    stephen     JDK 1.2 sync
14*   12/08/98    rtg         New Locale implementation and C API
15*   03/15/99    damiba      overhaul.
16*   04/06/99    stephen     changed setDefault() to realloc and copy
17*   06/14/99    stephen     Changed calls to ures_open for new params
18*   07/21/99    stephen     Modified setDefault() to propagate to C++
19*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
20*                           brought canonicalization code into line with spec
21*****************************************************************************/
22
23/*
24   POSIX's locale format, from putil.c: [no spaces]
25
26     ll [ _CC ] [ . MM ] [ @ VV]
27
28     l = lang, C = ctry, M = charmap, V = variant
29*/
30
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
34#include "unicode/ures.h"
35
36#include "putilimp.h"
37#include "ustr_imp.h"
38#include "ulocimp.h"
39#include "uresimp.h"
40#include "umutex.h"
41#include "cstring.h"
42#include "cmemory.h"
43#include "ucln_cmn.h"
44#include "locmap.h"
45#include "uarrsort.h"
46#include "uenumimp.h"
47#include "uassert.h"
48
49#include <stdio.h> /* for sprintf */
50
51/* ### Declarations **************************************************/
52
53/* Locale stuff from locid.cpp */
54U_CFUNC void locale_set_default(const char *id);
55U_CFUNC const char *locale_get_default(void);
56U_CFUNC int32_t
57locale_getKeywords(const char *localeID,
58            char prev,
59            char *keywords, int32_t keywordCapacity,
60            char *values, int32_t valuesCapacity, int32_t *valLen,
61            UBool valuesToo,
62            UErrorCode *status);
63
64/* ### Constants **************************************************/
65
66/* These strings describe the resources we attempt to load from
67 the locale ResourceBundle data file.*/
68static const char _kLanguages[]       = "Languages";
69static const char _kScripts[]         = "Scripts";
70static const char _kCountries[]       = "Countries";
71static const char _kVariants[]        = "Variants";
72static const char _kKeys[]            = "Keys";
73static const char _kTypes[]           = "Types";
74static const char _kIndexLocaleName[] = "res_index";
75static const char _kRootName[]        = "root";
76static const char _kIndexTag[]        = "InstalledLocales";
77static const char _kCurrency[]        = "currency";
78static const char _kCurrencies[]      = "Currencies";
79static char** _installedLocales = NULL;
80static int32_t _installedLocalesCount = 0;
81
82/* ### Data tables **************************************************/
83
84/**
85 * Table of language codes, both 2- and 3-letter, with preference
86 * given to 2-letter codes where possible.  Includes 3-letter codes
87 * that lack a 2-letter equivalent.
88 *
89 * This list must be in sorted order.  This list is returned directly
90 * to the user by some API.
91 *
92 * This list must be kept in sync with LANGUAGES_3, with corresponding
93 * entries matched.
94 *
95 * This table should be terminated with a NULL entry, followed by a
96 * second list, and another NULL entry.  The first list is visible to
97 * user code when this array is returned by API.  The second list
98 * contains codes we support, but do not expose through user API.
99 *
100 * Notes
101 *
102 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
103 * include the revisions up to 2001/7/27 *CWB*
104 *
105 * The 3 character codes are the terminology codes like RFC 3066.  This
106 * is compatible with prior ICU codes
107 *
108 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
109 * table but now at the end of the table because 3 character codes are
110 * duplicates.  This avoids bad searches going from 3 to 2 character
111 * codes.
112 *
113 * The range qaa-qtz is reserved for local use
114 */
115static const char * const LANGUAGES[] = {
116    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",
117    "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",
118    "ang", "anp", "apa",
119    "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",
120    "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",
121    "bai", "bal", "ban", "bas", "bat", "be",  "bej",
122    "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",
123    "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",
124    "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",
125    "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",
126    "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",
127    "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",
128    "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",
129    "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",
130    "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",
131    "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",
132    "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",
133    "fr",  "frm", "fro", "frr", "frs", "fur", "fy",
134    "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
135    "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
136    "grc", "gsw", "gu",  "gv", "gwi",
137    "ha",  "hai", "haw", "he",  "hi",  "hil", "him",
138    "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",
139    "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",
140    "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",
141    "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",
142    "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
143    "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",
144    "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",
145    "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",
146    "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",
147    "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",
148    "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",
149    "mdf", "mdr", "men", "mg",  "mga", "mh",  "mi",  "mic", "min",
150    "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",
151    "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",
152    "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",
153    "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",
154    "niu", "nl",  "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub",
155    "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",
156    "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",
157    "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
158    "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",
159    "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",
160    "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",
161    "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",
162    "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",
163    "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
164    "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
165    "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",
166    "sv",  "sw",  "syc", "syr", "ta",  "tai", "te",  "tem", "ter",
167    "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",
168    "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",
169    "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
170    "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",
171    "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",
172    "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",
173    "yi",  "yo",  "ypk", "za",  "zap", "zbl", "zen", "zh",  "znd",
174    "zu",  "zun", "zxx", "zza",
175NULL,
176    "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
177NULL
178};
179static const char* const DEPRECATED_LANGUAGES[]={
180    "in", "iw", "ji", "jw", NULL, NULL
181};
182static const char* const REPLACEMENT_LANGUAGES[]={
183    "id", "he", "yi", "jv", NULL, NULL
184};
185
186/**
187 * Table of 3-letter language codes.
188 *
189 * This is a lookup table used to convert 3-letter language codes to
190 * their 2-letter equivalent, where possible.  It must be kept in sync
191 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
192 * same language as LANGUAGES_3[i].  The commented-out lines are
193 * copied from LANGUAGES to make eyeballing this baby easier.
194 *
195 * Where a 3-letter language code has no 2-letter equivalent, the
196 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
197 *
198 * This table should be terminated with a NULL entry, followed by a
199 * second list, and another NULL entry.  The two lists correspond to
200 * the two lists in LANGUAGES.
201 */
202static const char * const LANGUAGES_3[] = {
203/*  "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",    */
204    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
205/*  "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",  "ang", "anp", "apa",    */
206    "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
207/*  "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",    */
208    "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
209/*  "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",    */
210    "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
211/*  "bai", "bal", "ban", "bas", "bat", "be",  "bej",    */
212    "bai", "bal", "ban", "bas", "bat", "bel", "bej",
213/*  "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",    */
214    "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
215/*  "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",     */
216    "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
217/*  "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",    */
218    "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
219/*  "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",    */
220    "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
221/*  "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",    */
222    "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
223/*  "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",    */
224    "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
225/*  "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",    */
226    "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
227/*  "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",    */
228    "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
229/*  "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",     */
230    "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
231/*  "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",     */
232    "enm", "epo", "spa", "est", "eus", "ewo", "fas",
233/*  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",    */
234    "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
235/*  "fr",  "frm", "fro", "fur", "frr", "frs", "fy",  "ga",  "gaa", "gay",    */
236    "fra", "frm", "fro", "fur", "frr", "frs", "fry", "gle", "gaa", "gay",
237/*  "gba", "gd",  "gem", "gez", "gil", "gl",  "gmh", "gn",     */
238    "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
239/*  "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "gv",     */
240    "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
241/*  "gwi", "ha",  "hai", "haw", "he",  "hi",  "hil", "him",    */
242    "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
243/*  "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",     */
244    "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
245/*  "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",     */
246    "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
247/*  "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",      */
248    "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
249/*  "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",   */
250    "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
251/*  "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",*/
252    "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
253/*  "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",     */
254    "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
255/*  "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",     */
256    "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
257/*  "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",    */
258    "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
259/*  "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",    */
260    "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
261/*  "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",    */
262    "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
263/*  "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",    */
264    "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
265/*  "mdf", "mdr", "men", "mg",  "mga", "mh",  "mi",  "mic", "min",    */
266    "mdf", "mdr", "men", "mlg", "mga", "mah", "mri", "mic", "min",
267/*  "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",    */
268    "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
269/*  "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",    */
270    "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
271/*  "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",    */
272    "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
273/*  "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",    */
274    "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
275/*  "niu", "nl",  "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub",    */
276    "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
277/*  "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",     */
278    "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
279/*  "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",    */
280    "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
281/*  "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",    */
282    "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
283/*  "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",     */
284    "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
285/*  "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",    */
286    "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
287/*  "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",    */
288    "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
289/*  "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",    */
290    "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
291/*  "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",    */
292    "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
293/*  "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",    */
294    "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
295/*  "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",     */
296    "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
297/*  "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",    */
298    "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
299/*  "sv",  "sw",  "syc", "syr", "ta",  "tai", "te",  "tem", "ter",    */
300    "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
301/*  "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",    */
302    "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
303/*  "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr",     */
304    "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur",
305/*  "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",     */
306    "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
307/*  "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",     */
308    "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
309/*  "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",    */
310    "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
311/*  "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",    */
312    "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
313/*  "yi",  "yo",  "ypk", "za",  "zap", "zbl", "zen", "zh",  "znd",    */
314    "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
315/*  "zu",  "zun", "zxx", "zza",                                         */
316    "zul", "zun", "zxx", "zza",
317NULL,
318/*  "in",  "iw",  "ji",  "jw",  "sh",                          */
319    "ind", "heb", "yid", "jaw", "srp",
320NULL
321};
322
323/**
324 * Table of 2-letter country codes.
325 *
326 * This list must be in sorted order.  This list is returned directly
327 * to the user by some API.
328 *
329 * This list must be kept in sync with COUNTRIES_3, with corresponding
330 * entries matched.
331 *
332 * This table should be terminated with a NULL entry, followed by a
333 * second list, and another NULL entry.  The first list is visible to
334 * user code when this array is returned by API.  The second list
335 * contains codes we support, but do not expose through user API.
336 *
337 * Notes:
338 *
339 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
340 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
341 * new codes keeping the old ones for compatibility updated to include
342 * 1999/12/03 revisions *CWB*
343 *
344 * RO(ROM) is now RO(ROU) according to
345 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
346 */
347static const char * const COUNTRIES[] = {
348    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",
349    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
350    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
351    "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",
352    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
353    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
354    "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
355    "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
356    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
357    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
358    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
359    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
360    "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
361    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
362    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
363    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
364    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
365    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
366    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
367    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
368    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
369    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
370    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
371    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
372    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",
373    "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
374    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
375    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
376    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
377    "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
378NULL,
379    "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   /* obsolete country codes */
380NULL
381};
382
383static const char* const DEPRECATED_COUNTRIES[] ={
384    "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
385};
386static const char* const REPLACEMENT_COUNTRIES[] = {
387/*  "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
388    "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL  /* replacement country codes */
389};
390
391/**
392 * Table of 3-letter country codes.
393 *
394 * This is a lookup table used to convert 3-letter country codes to
395 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
396 * For all valid i, COUNTRIES[i] must refer to the same country as
397 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
398 * to make eyeballing this baby easier.
399 *
400 * This table should be terminated with a NULL entry, followed by a
401 * second list, and another NULL entry.  The two lists correspond to
402 * the two lists in COUNTRIES.
403 */
404static const char * const COUNTRIES_3[] = {
405/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",     */
406    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
407/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
408    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
409/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
410    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
411/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",     */
412    "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
413/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
414    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
415/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
416    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
417/*  "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
418    "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
419/*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
420    "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
421/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
422    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
423/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
424    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
425/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
426    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
427/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
428    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
429/*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
430    "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
431/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
432    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
433/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
434    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
435/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
436    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
437/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
438    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
439/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
440    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
441/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
442    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
443/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
444    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
445/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
446    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
447/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
448    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
449/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
450    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
451/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
452    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
453/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",     */
454    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
455/*  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
456    "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
457/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
458    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
459/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
460    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
461/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
462    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
463/*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
464    "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
465NULL,
466/*  "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   */
467    "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
468NULL
469};
470
471typedef struct CanonicalizationMap {
472    const char *id;          /* input ID */
473    const char *canonicalID; /* canonicalized output ID */
474    const char *keyword;     /* keyword, or NULL if none */
475    const char *value;       /* keyword value, or NULL if kw==NULL */
476} CanonicalizationMap;
477
478/**
479 * A map to canonicalize locale IDs.  This handles a variety of
480 * different semantic kinds of transformations.
481 */
482static const CanonicalizationMap CANONICALIZE_MAP[] = {
483    { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
484    { "C",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
485    { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
486    { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
487    { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
488    { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
489    { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
490    { "cel_GAULISH",    "cel__GAULISH", NULL, NULL }, /* registered name */
491    { "de_1901",        "de__1901", NULL, NULL }, /* registered name */
492    { "de_1906",        "de__1906", NULL, NULL }, /* registered name */
493    { "de__PHONEBOOK",  "de", "collation", "phonebook" },
494    { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
495    { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
496    { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
497    { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
498    { "en_BOONT",       "en__BOONT", NULL, NULL }, /* registered name */
499    { "en_SCOUSE",      "en__SCOUSE", NULL, NULL }, /* registered name */
500    { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
501    { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
502    { "es__TRADITIONAL", "es", "collation", "traditional" },
503    { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
504    { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
505    { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
506    { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
507    { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
508    { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
509    { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
510    { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
511    { "hi__DIRECT",     "hi", "collation", "direct" },
512    { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
513    { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" },
514    { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
515    { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
516    { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
517    { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
518    { "sl_ROZAJ",       "sl__ROZAJ", NULL, NULL }, /* registered name */
519    { "sr_SP_CYRL",     "sr_Cyrl_CS", NULL, NULL }, /* .NET name */
520    { "sr_SP_LATN",     "sr_Latn_CS", NULL, NULL }, /* .NET name */
521    { "sr_YU_CYRILLIC", "sr_Cyrl_CS", NULL, NULL }, /* Linux name */
522    { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
523    { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
524    { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
525    { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
526    { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name TODO: This should be zh_Hant once the locale structure is fixed. */
527    { "zh_GAN",         "zh__GAN", NULL, NULL }, /* registered name */
528    { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
529    { "zh_HAKKA",       "zh__HAKKA", NULL, NULL }, /* registered name */
530    { "zh_MIN",         "zh__MIN", NULL, NULL }, /* registered name */
531    { "zh_MIN_NAN",     "zh__MINNAN", NULL, NULL }, /* registered name */
532    { "zh_WUU",         "zh__WUU", NULL, NULL }, /* registered name */
533    { "zh_XIANG",       "zh__XIANG", NULL, NULL }, /* registered name */
534    { "zh_YUE",         "zh__YUE", NULL, NULL }, /* registered name */
535    { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" },
536    { "hi_IN_TRADITIONAL", "hi_IN", "calendar", "indian" },
537    { "zh_TW_STROKE",   "zh_Hant_TW", "collation", "stroke" },
538    { "zh__PINYIN",     "zh", "collation", "pinyin" }
539};
540
541/* ### Keywords **************************************************/
542
543#define ULOC_KEYWORD_BUFFER_LEN 25
544#define ULOC_MAX_NO_KEYWORDS 25
545
546static const char *
547locale_getKeywordsStart(const char *localeID) {
548    const char *result = NULL;
549    if((result = uprv_strchr(localeID, '@')) != NULL) {
550        return result;
551    }
552#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
553    else {
554        /* We do this because the @ sign is variant, and the @ sign used on one
555        EBCDIC machine won't be compiled the same way on other EBCDIC based
556        machines. */
557        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
558        const uint8_t *charToFind = ebcdicSigns;
559        while(*charToFind) {
560            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
561                return result;
562            }
563            charToFind++;
564        }
565    }
566#endif
567    return NULL;
568}
569
570/**
571 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
572 * @param keywordName incoming name to be canonicalized
573 * @param status return status (keyword too long)
574 * @return length of the keyword name
575 */
576static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
577{
578  int32_t i;
579  int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
580
581  if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
582    /* keyword name too long for internal buffer */
583    *status = U_INTERNAL_PROGRAM_ERROR;
584          return 0;
585  }
586
587  /* normalize the keyword name */
588  for(i = 0; i < keywordNameLen; i++) {
589    buf[i] = uprv_tolower(keywordName[i]);
590  }
591  buf[i] = 0;
592
593  return keywordNameLen;
594}
595
596typedef struct {
597    char keyword[ULOC_KEYWORD_BUFFER_LEN];
598    int32_t keywordLen;
599    const char *valueStart;
600    int32_t valueLen;
601} KeywordStruct;
602
603static int32_t U_CALLCONV
604compareKeywordStructs(const void *context, const void *left, const void *right) {
605    const char* leftString = ((const KeywordStruct *)left)->keyword;
606    const char* rightString = ((const KeywordStruct *)right)->keyword;
607    return uprv_strcmp(leftString, rightString);
608}
609
610/**
611 * Both addKeyword and addValue must already be in canonical form.
612 * Either both addKeyword and addValue are NULL, or neither is NULL.
613 * If they are not NULL they must be zero terminated.
614 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
615 */
616static int32_t
617_getKeywords(const char *localeID,
618             char prev,
619             char *keywords, int32_t keywordCapacity,
620             char *values, int32_t valuesCapacity, int32_t *valLen,
621             UBool valuesToo,
622             const char* addKeyword,
623             const char* addValue,
624             UErrorCode *status)
625{
626    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
627
628    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
629    int32_t numKeywords = 0;
630    const char* pos = localeID;
631    const char* equalSign = NULL;
632    const char* semicolon = NULL;
633    int32_t i = 0, j, n;
634    int32_t keywordsLen = 0;
635    int32_t valuesLen = 0;
636
637    if(prev == '@') { /* start of keyword definition */
638        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
639        do {
640            UBool duplicate = FALSE;
641            /* skip leading spaces */
642            while(*pos == ' ') {
643                pos++;
644            }
645            if (!*pos) { /* handle trailing "; " */
646                break;
647            }
648            if(numKeywords == maxKeywords) {
649                *status = U_INTERNAL_PROGRAM_ERROR;
650                return 0;
651            }
652            equalSign = uprv_strchr(pos, '=');
653            semicolon = uprv_strchr(pos, ';');
654            /* lack of '=' [foo@currency] is illegal */
655            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
656            if(!equalSign || (semicolon && semicolon<equalSign)) {
657                *status = U_INVALID_FORMAT_ERROR;
658                return 0;
659            }
660            /* need to normalize both keyword and keyword name */
661            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
662                /* keyword name too long for internal buffer */
663                *status = U_INTERNAL_PROGRAM_ERROR;
664                return 0;
665            }
666            for(i = 0, n = 0; i < equalSign - pos; ++i) {
667                if (pos[i] != ' ') {
668                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
669                }
670            }
671            keywordList[numKeywords].keyword[n] = 0;
672            keywordList[numKeywords].keywordLen = n;
673            /* now grab the value part. First we skip the '=' */
674            equalSign++;
675            /* then we leading spaces */
676            while(*equalSign == ' ') {
677                equalSign++;
678            }
679            keywordList[numKeywords].valueStart = equalSign;
680
681            pos = semicolon;
682            i = 0;
683            if(pos) {
684                while(*(pos - i - 1) == ' ') {
685                    i++;
686                }
687                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
688                pos++;
689            } else {
690                i = (int32_t)uprv_strlen(equalSign);
691                while(equalSign[i-1] == ' ') {
692                    i--;
693                }
694                keywordList[numKeywords].valueLen = i;
695            }
696            /* If this is a duplicate keyword, then ignore it */
697            for (j=0; j<numKeywords; ++j) {
698                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
699                    duplicate = TRUE;
700                    break;
701                }
702            }
703            if (!duplicate) {
704                ++numKeywords;
705            }
706        } while(pos);
707
708        /* Handle addKeyword/addValue. */
709        if (addKeyword != NULL) {
710            UBool duplicate = FALSE;
711            U_ASSERT(addValue != NULL);
712            /* Search for duplicate; if found, do nothing. Explicit keyword
713               overrides addKeyword. */
714            for (j=0; j<numKeywords; ++j) {
715                if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
716                    duplicate = TRUE;
717                    break;
718                }
719            }
720            if (!duplicate) {
721                if (numKeywords == maxKeywords) {
722                    *status = U_INTERNAL_PROGRAM_ERROR;
723                    return 0;
724                }
725                uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
726                keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
727                keywordList[numKeywords].valueStart = addValue;
728                keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
729                ++numKeywords;
730            }
731        } else {
732            U_ASSERT(addValue == NULL);
733        }
734
735        /* now we have a list of keywords */
736        /* we need to sort it */
737        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
738
739        /* Now construct the keyword part */
740        for(i = 0; i < numKeywords; i++) {
741            if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
742                uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
743                if(valuesToo) {
744                    keywords[keywordsLen + keywordList[i].keywordLen] = '=';
745                } else {
746                    keywords[keywordsLen + keywordList[i].keywordLen] = 0;
747                }
748            }
749            keywordsLen += keywordList[i].keywordLen + 1;
750            if(valuesToo) {
751                if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
752                    uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
753                }
754                keywordsLen += keywordList[i].valueLen;
755
756                if(i < numKeywords - 1) {
757                    if(keywordsLen < keywordCapacity) {
758                        keywords[keywordsLen] = ';';
759                    }
760                    keywordsLen++;
761                }
762            }
763            if(values) {
764                if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
765                    uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
766                    values[valuesLen + keywordList[i].valueLen] = 0;
767                }
768                valuesLen += keywordList[i].valueLen + 1;
769            }
770        }
771        if(values) {
772            values[valuesLen] = 0;
773            if(valLen) {
774                *valLen = valuesLen;
775            }
776        }
777        return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
778    } else {
779        return 0;
780    }
781}
782
783U_CFUNC int32_t
784locale_getKeywords(const char *localeID,
785                   char prev,
786                   char *keywords, int32_t keywordCapacity,
787                   char *values, int32_t valuesCapacity, int32_t *valLen,
788                   UBool valuesToo,
789                   UErrorCode *status) {
790    return _getKeywords(localeID, prev, keywords, keywordCapacity,
791                        values, valuesCapacity, valLen, valuesToo,
792                        NULL, NULL, status);
793}
794
795U_CAPI int32_t U_EXPORT2
796uloc_getKeywordValue(const char* localeID,
797                     const char* keywordName,
798                     char* buffer, int32_t bufferCapacity,
799                     UErrorCode* status)
800{
801    const char* nextSeparator = NULL;
802    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
803    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
804    int32_t i = 0;
805    int32_t result = 0;
806
807    if(status && U_SUCCESS(*status) && localeID) {
808
809      const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
810      if(startSearchHere == NULL) {
811          /* no keywords, return at once */
812          return 0;
813      }
814
815      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
816      if(U_FAILURE(*status)) {
817        return 0;
818      }
819
820      /* find the first keyword */
821      while(startSearchHere) {
822          startSearchHere++;
823          /* skip leading spaces (allowed?) */
824          while(*startSearchHere == ' ') {
825              startSearchHere++;
826          }
827          nextSeparator = uprv_strchr(startSearchHere, '=');
828          /* need to normalize both keyword and keyword name */
829          if(!nextSeparator) {
830              break;
831          }
832          if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
833              /* keyword name too long for internal buffer */
834              *status = U_INTERNAL_PROGRAM_ERROR;
835              return 0;
836          }
837          for(i = 0; i < nextSeparator - startSearchHere; i++) {
838              localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
839          }
840          /* trim trailing spaces */
841          while(startSearchHere[i-1] == ' ') {
842              i--;
843          }
844          localeKeywordNameBuffer[i] = 0;
845
846          startSearchHere = uprv_strchr(nextSeparator, ';');
847
848          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
849              nextSeparator++;
850              while(*nextSeparator == ' ') {
851                  nextSeparator++;
852              }
853              /* we actually found the keyword. Copy the value */
854              if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
855                  while(*(startSearchHere-1) == ' ') {
856                      startSearchHere--;
857                  }
858                  uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
859                  result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
860              } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
861                  i = (int32_t)uprv_strlen(nextSeparator);
862                  while(nextSeparator[i - 1] == ' ') {
863                      i--;
864                  }
865                  uprv_strncpy(buffer, nextSeparator, i);
866                  result = u_terminateChars(buffer, bufferCapacity, i, status);
867              } else {
868                  /* give a bigger buffer, please */
869                  *status = U_BUFFER_OVERFLOW_ERROR;
870                  if(startSearchHere) {
871                      result = (int32_t)(startSearchHere - nextSeparator);
872                  } else {
873                      result = (int32_t)uprv_strlen(nextSeparator);
874                  }
875              }
876              return result;
877          }
878      }
879    }
880    return 0;
881}
882
883U_CAPI int32_t U_EXPORT2
884uloc_setKeywordValue(const char* keywordName,
885                     const char* keywordValue,
886                     char* buffer, int32_t bufferCapacity,
887                     UErrorCode* status)
888{
889    /* TODO: sorting. removal. */
890    int32_t keywordNameLen;
891    int32_t keywordValueLen;
892    int32_t bufLen;
893    int32_t needLen = 0;
894    int32_t foundValueLen;
895    int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
896    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
897    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
898    int32_t i = 0;
899    int32_t rc;
900    char* nextSeparator = NULL;
901    char* nextEqualsign = NULL;
902    char* startSearchHere = NULL;
903    char* keywordStart = NULL;
904    char *insertHere = NULL;
905    if(U_FAILURE(*status)) {
906        return -1;
907    }
908    if(bufferCapacity>1) {
909        bufLen = (int32_t)uprv_strlen(buffer);
910    } else {
911        *status = U_ILLEGAL_ARGUMENT_ERROR;
912        return 0;
913    }
914    if(bufferCapacity<bufLen) {
915        /* The capacity is less than the length?! Is this NULL terminated? */
916        *status = U_ILLEGAL_ARGUMENT_ERROR;
917        return 0;
918    }
919    if(keywordValue && !*keywordValue) {
920        keywordValue = NULL;
921    }
922    if(keywordValue) {
923        keywordValueLen = (int32_t)uprv_strlen(keywordValue);
924    } else {
925        keywordValueLen = 0;
926    }
927    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
928    if(U_FAILURE(*status)) {
929        return 0;
930    }
931    startSearchHere = (char*)locale_getKeywordsStart(buffer);
932    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
933        if(!keywordValue) { /* no keywords = nothing to remove */
934            return bufLen;
935        }
936
937        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
938        if(startSearchHere) { /* had a single @ */
939            needLen--; /* already had the @ */
940            /* startSearchHere points at the @ */
941        } else {
942            startSearchHere=buffer+bufLen;
943        }
944        if(needLen >= bufferCapacity) {
945            *status = U_BUFFER_OVERFLOW_ERROR;
946            return needLen; /* no change */
947        }
948        *startSearchHere = '@';
949        startSearchHere++;
950        uprv_strcpy(startSearchHere, keywordNameBuffer);
951        startSearchHere += keywordNameLen;
952        *startSearchHere = '=';
953        startSearchHere++;
954        uprv_strcpy(startSearchHere, keywordValue);
955        startSearchHere+=keywordValueLen;
956        return needLen;
957    } /* end shortcut - no @ */
958
959    keywordStart = startSearchHere;
960    /* search for keyword */
961    while(keywordStart) {
962        keywordStart++;
963        /* skip leading spaces (allowed?) */
964        while(*keywordStart == ' ') {
965            keywordStart++;
966        }
967        nextEqualsign = uprv_strchr(keywordStart, '=');
968        /* need to normalize both keyword and keyword name */
969        if(!nextEqualsign) {
970            break;
971        }
972        if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
973            /* keyword name too long for internal buffer */
974            *status = U_INTERNAL_PROGRAM_ERROR;
975            return 0;
976        }
977        for(i = 0; i < nextEqualsign - keywordStart; i++) {
978            localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
979        }
980        /* trim trailing spaces */
981        while(keywordStart[i-1] == ' ') {
982            i--;
983        }
984        localeKeywordNameBuffer[i] = 0;
985
986        nextSeparator = uprv_strchr(nextEqualsign, ';');
987        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
988        if(rc == 0) {
989            nextEqualsign++;
990            while(*nextEqualsign == ' ') {
991                nextEqualsign++;
992            }
993            /* we actually found the keyword. Change the value */
994            if (nextSeparator) {
995                keywordAtEnd = 0;
996                foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
997            } else {
998                keywordAtEnd = 1;
999                foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1000            }
1001            if(keywordValue) { /* adding a value - not removing */
1002              if(foundValueLen == keywordValueLen) {
1003                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1004                return bufLen; /* no change in size */
1005              } else if(foundValueLen > keywordValueLen) {
1006                int32_t delta = foundValueLen - keywordValueLen;
1007                if(nextSeparator) { /* RH side */
1008                  uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1009                }
1010                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1011                bufLen -= delta;
1012                buffer[bufLen]=0;
1013                return bufLen;
1014              } else { /* FVL < KVL */
1015                int32_t delta = keywordValueLen - foundValueLen;
1016                if((bufLen+delta) >= bufferCapacity) {
1017                  *status = U_BUFFER_OVERFLOW_ERROR;
1018                  return bufLen+delta;
1019                }
1020                if(nextSeparator) { /* RH side */
1021                  uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1022                }
1023                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1024                bufLen += delta;
1025                buffer[bufLen]=0;
1026                return bufLen;
1027              }
1028            } else { /* removing a keyword */
1029              if(keywordAtEnd) {
1030                /* zero out the ';' or '@' just before startSearchhere */
1031                keywordStart[-1] = 0;
1032                return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1033              } else {
1034                uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1035                keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1036                return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1037              }
1038            }
1039        } else if(rc<0){ /* end match keyword */
1040          /* could insert at this location. */
1041          insertHere = keywordStart;
1042        }
1043        keywordStart = nextSeparator;
1044    } /* end loop searching */
1045
1046    if(!keywordValue) {
1047      return bufLen; /* removal of non-extant keyword - no change */
1048    }
1049
1050    /* we know there is at least one keyword. */
1051    needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1052    if(needLen >= bufferCapacity) {
1053        *status = U_BUFFER_OVERFLOW_ERROR;
1054        return needLen; /* no change */
1055    }
1056
1057    if(insertHere) {
1058      uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1059      keywordStart = insertHere;
1060    } else {
1061      keywordStart = buffer+bufLen;
1062      *keywordStart = ';';
1063      keywordStart++;
1064    }
1065    uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1066    keywordStart += keywordNameLen;
1067    *keywordStart = '=';
1068    keywordStart++;
1069    uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1070    keywordStart+=keywordValueLen;
1071    if(insertHere) {
1072      *keywordStart = ';';
1073      keywordStart++;
1074    }
1075    buffer[needLen]=0;
1076    return needLen;
1077}
1078
1079/* ### ID parsing implementation **************************************************/
1080
1081/*returns TRUE if a is an ID separator FALSE otherwise*/
1082#define _isIDSeparator(a) (a == '_' || a == '-')
1083
1084#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1085
1086/*returns TRUE if one of the special prefixes is here (s=string)
1087  'x-' or 'i-' */
1088#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1089
1090/* Dot terminates it because of POSIX form  where dot precedes the codepage
1091 * except for variant
1092 */
1093#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1094
1095static char* _strnchr(const char* str, int32_t len, char c) {
1096    U_ASSERT(str != 0 && len >= 0);
1097    while (len-- != 0) {
1098        char d = *str;
1099        if (d == c) {
1100            return (char*) str;
1101        } else if (d == 0) {
1102            break;
1103        }
1104        ++str;
1105    }
1106    return NULL;
1107}
1108
1109/**
1110 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1111 * a NULL entry, followed by more entries, and a second NULL entry.
1112 *
1113 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1114 * COUNTRIES_3.
1115 */
1116static int16_t _findIndex(const char* const* list, const char* key)
1117{
1118    const char* const* anchor = list;
1119    int32_t pass = 0;
1120
1121    /* Make two passes through two NULL-terminated arrays at 'list' */
1122    while (pass++ < 2) {
1123        while (*list) {
1124            if (uprv_strcmp(key, *list) == 0) {
1125                return (int16_t)(list - anchor);
1126            }
1127            list++;
1128        }
1129        ++list;     /* skip final NULL *CWB*/
1130    }
1131    return -1;
1132}
1133
1134/* count the length of src while copying it to dest; return strlen(src) */
1135static U_INLINE int32_t
1136_copyCount(char *dest, int32_t destCapacity, const char *src) {
1137    const char *anchor;
1138    char c;
1139
1140    anchor=src;
1141    for(;;) {
1142        if((c=*src)==0) {
1143            return (int32_t)(src-anchor);
1144        }
1145        if(destCapacity<=0) {
1146            return (int32_t)((src-anchor)+uprv_strlen(src));
1147        }
1148        ++src;
1149        *dest++=c;
1150        --destCapacity;
1151    }
1152}
1153
1154static const char*
1155uloc_getCurrentCountryID(const char* oldID){
1156    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1157    if (offset >= 0) {
1158        return REPLACEMENT_COUNTRIES[offset];
1159    }
1160    return oldID;
1161}
1162static const char*
1163uloc_getCurrentLanguageID(const char* oldID){
1164    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1165    if (offset >= 0) {
1166        return REPLACEMENT_LANGUAGES[offset];
1167    }
1168    return oldID;
1169}
1170/*
1171 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1172 * avoid duplicating code to handle the earlier locale ID pieces
1173 * in the functions for the later ones by
1174 * setting the *pEnd pointer to where they stopped parsing
1175 *
1176 * TODO try to use this in Locale
1177 */
1178static int32_t
1179_getLanguage(const char *localeID,
1180             char *language, int32_t languageCapacity,
1181             const char **pEnd) {
1182    int32_t i=0;
1183    int32_t offset;
1184    char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1185
1186    /* if it starts with i- or x- then copy that prefix */
1187    if(_isIDPrefix(localeID)) {
1188        if(i<languageCapacity) {
1189            language[i]=(char)uprv_tolower(*localeID);
1190        }
1191        if(i<languageCapacity) {
1192            language[i+1]='-';
1193        }
1194        i+=2;
1195        localeID+=2;
1196    }
1197
1198    /* copy the language as far as possible and count its length */
1199    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1200        if(i<languageCapacity) {
1201            language[i]=(char)uprv_tolower(*localeID);
1202        }
1203        if(i<3) {
1204            lang[i]=(char)uprv_tolower(*localeID);
1205        }
1206        i++;
1207        localeID++;
1208    }
1209
1210    if(i==3) {
1211        /* convert 3 character code to 2 character code if possible *CWB*/
1212        offset=_findIndex(LANGUAGES_3, lang);
1213        if(offset>=0) {
1214            i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1215        }
1216    }
1217
1218    if(pEnd!=NULL) {
1219        *pEnd=localeID;
1220    }
1221    return i;
1222}
1223
1224static int32_t
1225_getScript(const char *localeID,
1226            char *script, int32_t scriptCapacity,
1227            const char **pEnd)
1228{
1229    int32_t idLen = 0;
1230
1231    if (pEnd != NULL) {
1232        *pEnd = localeID;
1233    }
1234
1235    /* copy the second item as far as possible and count its length */
1236    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1237        idLen++;
1238    }
1239
1240    /* If it's exactly 4 characters long, then it's a script and not a country. */
1241    if (idLen == 4) {
1242        int32_t i;
1243        if (pEnd != NULL) {
1244            *pEnd = localeID+idLen;
1245        }
1246        if(idLen > scriptCapacity) {
1247            idLen = scriptCapacity;
1248        }
1249        if (idLen >= 1) {
1250            script[0]=(char)uprv_toupper(*(localeID++));
1251        }
1252        for (i = 1; i < idLen; i++) {
1253            script[i]=(char)uprv_tolower(*(localeID++));
1254        }
1255    }
1256    else {
1257        idLen = 0;
1258    }
1259    return idLen;
1260}
1261
1262static int32_t
1263_getCountry(const char *localeID,
1264            char *country, int32_t countryCapacity,
1265            const char **pEnd)
1266{
1267    int32_t i=0;
1268    char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1269    int32_t offset;
1270
1271    /* copy the country as far as possible and count its length */
1272    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1273        if(i<countryCapacity) {
1274            country[i]=(char)uprv_toupper(*localeID);
1275        }
1276        if(i<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1277            cnty[i]=(char)uprv_toupper(*localeID);
1278        }
1279        i++;
1280        localeID++;
1281    }
1282
1283    /* convert 3 character code to 2 character code if possible *CWB*/
1284    if(i==3) {
1285        offset=_findIndex(COUNTRIES_3, cnty);
1286        if(offset>=0) {
1287            i=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1288        }
1289    }
1290
1291    if(pEnd!=NULL) {
1292        *pEnd=localeID;
1293    }
1294    return i;
1295}
1296
1297/**
1298 * @param needSeparator if true, then add leading '_' if any variants
1299 * are added to 'variant'
1300 */
1301static int32_t
1302_getVariantEx(const char *localeID,
1303              char prev,
1304              char *variant, int32_t variantCapacity,
1305              UBool needSeparator) {
1306    int32_t i=0;
1307
1308    /* get one or more variant tags and separate them with '_' */
1309    if(_isIDSeparator(prev)) {
1310        /* get a variant string after a '-' or '_' */
1311        while(!_isTerminator(*localeID)) {
1312            if (needSeparator) {
1313                if (i<variantCapacity) {
1314                    variant[i] = '_';
1315                }
1316                ++i;
1317                needSeparator = FALSE;
1318            }
1319            if(i<variantCapacity) {
1320                variant[i]=(char)uprv_toupper(*localeID);
1321                if(variant[i]=='-') {
1322                    variant[i]='_';
1323                }
1324            }
1325            i++;
1326            localeID++;
1327        }
1328    }
1329
1330    /* if there is no variant tag after a '-' or '_' then look for '@' */
1331    if(i==0) {
1332        if(prev=='@') {
1333            /* keep localeID */
1334        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1335            ++localeID; /* point after the '@' */
1336        } else {
1337            return 0;
1338        }
1339        while(!_isTerminator(*localeID)) {
1340            if (needSeparator) {
1341                if (i<variantCapacity) {
1342                    variant[i] = '_';
1343                }
1344                ++i;
1345                needSeparator = FALSE;
1346            }
1347            if(i<variantCapacity) {
1348                variant[i]=(char)uprv_toupper(*localeID);
1349                if(variant[i]=='-' || variant[i]==',') {
1350                    variant[i]='_';
1351                }
1352            }
1353            i++;
1354            localeID++;
1355        }
1356    }
1357
1358    return i;
1359}
1360
1361static int32_t
1362_getVariant(const char *localeID,
1363            char prev,
1364            char *variant, int32_t variantCapacity) {
1365    return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1366}
1367
1368/**
1369 * Delete ALL instances of a variant from the given list of one or
1370 * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1371 * @param variants the source string of one or more variants,
1372 * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1373 * terminated; if it is, trailing zero will NOT be maintained.
1374 * @param variantsLen length of variants
1375 * @param toDelete variant to delete, without separators, e.g.  "EURO"
1376 * or "PREEURO"; not zero terminated
1377 * @param toDeleteLen length of toDelete
1378 * @return number of characters deleted from variants
1379 */
1380static int32_t
1381_deleteVariant(char* variants, int32_t variantsLen,
1382               const char* toDelete, int32_t toDeleteLen) {
1383    int32_t delta = 0; /* number of chars deleted */
1384    for (;;) {
1385        UBool flag = FALSE;
1386        if (variantsLen < toDeleteLen) {
1387            return delta;
1388        }
1389        if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1390            (variantsLen == toDeleteLen ||
1391             (flag=(variants[toDeleteLen] == '_')))) {
1392            int32_t d = toDeleteLen + (flag?1:0);
1393            variantsLen -= d;
1394            delta += d;
1395            if (variantsLen > 0) {
1396                uprv_memmove(variants, variants+d, variantsLen);
1397            }
1398        } else {
1399            char* p = _strnchr(variants, variantsLen, '_');
1400            if (p == NULL) {
1401                return delta;
1402            }
1403            ++p;
1404            variantsLen -= (int32_t)(p - variants);
1405            variants = p;
1406        }
1407    }
1408}
1409
1410/* Keyword enumeration */
1411
1412typedef struct UKeywordsContext {
1413    char* keywords;
1414    char* current;
1415} UKeywordsContext;
1416
1417static void U_CALLCONV
1418uloc_kw_closeKeywords(UEnumeration *enumerator) {
1419    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1420    uprv_free(enumerator->context);
1421    uprv_free(enumerator);
1422}
1423
1424static int32_t U_CALLCONV
1425uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
1426    char *kw = ((UKeywordsContext *)en->context)->keywords;
1427    int32_t result = 0;
1428    while(*kw) {
1429        result++;
1430        kw += uprv_strlen(kw)+1;
1431    }
1432    return result;
1433}
1434
1435static const char* U_CALLCONV
1436uloc_kw_nextKeyword(UEnumeration* en,
1437                    int32_t* resultLength,
1438                    UErrorCode* status) {
1439    const char* result = ((UKeywordsContext *)en->context)->current;
1440    int32_t len = 0;
1441    if(*result) {
1442        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1443        ((UKeywordsContext *)en->context)->current += len+1;
1444    } else {
1445        result = NULL;
1446    }
1447    if (resultLength) {
1448        *resultLength = len;
1449    }
1450    return result;
1451}
1452
1453static void U_CALLCONV
1454uloc_kw_resetKeywords(UEnumeration* en,
1455                      UErrorCode* status) {
1456    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1457}
1458
1459static const UEnumeration gKeywordsEnum = {
1460    NULL,
1461    NULL,
1462    uloc_kw_closeKeywords,
1463    uloc_kw_countKeywords,
1464    uenum_unextDefault,
1465    uloc_kw_nextKeyword,
1466    uloc_kw_resetKeywords
1467};
1468
1469U_CAPI UEnumeration* U_EXPORT2
1470uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1471{
1472  UKeywordsContext *myContext = NULL;
1473  UEnumeration *result = NULL;
1474
1475  if(U_FAILURE(*status)) {
1476    return NULL;
1477  }
1478  result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1479  uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1480  myContext = uprv_malloc(sizeof(UKeywordsContext));
1481  if (myContext == NULL) {
1482    *status = U_MEMORY_ALLOCATION_ERROR;
1483    uprv_free(result);
1484    return NULL;
1485  }
1486  myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1487  uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1488  myContext->keywords[keywordListSize] = 0;
1489  myContext->current = myContext->keywords;
1490  result->context = myContext;
1491  return result;
1492}
1493
1494U_CAPI UEnumeration* U_EXPORT2
1495uloc_openKeywords(const char* localeID,
1496                        UErrorCode* status)
1497{
1498    int32_t i=0;
1499    char keywords[256];
1500    int32_t keywordsCapacity = 256;
1501    if(status==NULL || U_FAILURE(*status)) {
1502        return 0;
1503    }
1504
1505    if(localeID==NULL) {
1506        localeID=uloc_getDefault();
1507    }
1508
1509    /* Skip the language */
1510    _getLanguage(localeID, NULL, 0, &localeID);
1511    if(_isIDSeparator(*localeID)) {
1512        const char *scriptID;
1513        /* Skip the script if available */
1514        _getScript(localeID+1, NULL, 0, &scriptID);
1515        if(scriptID != localeID+1) {
1516            /* Found optional script */
1517            localeID = scriptID;
1518        }
1519        /* Skip the Country */
1520        if (_isIDSeparator(*localeID)) {
1521            _getCountry(localeID+1, NULL, 0, &localeID);
1522            if(_isIDSeparator(*localeID)) {
1523                _getVariant(localeID+1, *localeID, NULL, 0);
1524            }
1525        }
1526    }
1527
1528    /* keywords are located after '@' */
1529    if((localeID = locale_getKeywordsStart(localeID)) != NULL) {
1530        i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1531    }
1532
1533    if(i) {
1534        return uloc_openKeywordList(keywords, i, status);
1535    } else {
1536        return NULL;
1537    }
1538}
1539
1540
1541/* bit-flags for 'options' parameter of _canonicalize */
1542#define _ULOC_STRIP_KEYWORDS 0x2
1543#define _ULOC_CANONICALIZE   0x1
1544
1545#define OPTION_SET(options, mask) ((options & mask) != 0)
1546
1547static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1548#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1549
1550/**
1551 * Canonicalize the given localeID, to level 1 or to level 2,
1552 * depending on the options.  To specify level 1, pass in options=0.
1553 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1554 *
1555 * This is the code underlying uloc_getName and uloc_canonicalize.
1556 */
1557static int32_t
1558_canonicalize(const char* localeID,
1559              char* result,
1560              int32_t resultCapacity,
1561              uint32_t options,
1562              UErrorCode* err) {
1563    int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1564    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1565    const char* origLocaleID;
1566    const char* keywordAssign = NULL;
1567    const char* separatorIndicator = NULL;
1568    const char* addKeyword = NULL;
1569    const char* addValue = NULL;
1570    char* name;
1571    char* variant = NULL; /* pointer into name, or NULL */
1572    int32_t sawEuro = 0;
1573
1574    if (U_FAILURE(*err)) {
1575        return 0;
1576    }
1577
1578    if (localeID==NULL) {
1579        localeID=uloc_getDefault();
1580    }
1581    origLocaleID=localeID;
1582
1583    /* if we are doing a full canonicalization, then put results in
1584       localeBuffer, if necessary; otherwise send them to result. */
1585    if (OPTION_SET(options, _ULOC_CANONICALIZE) &&
1586        (result == NULL || resultCapacity <  sizeof(localeBuffer))) {
1587        name = localeBuffer;
1588        nameCapacity = sizeof(localeBuffer);
1589    } else {
1590        name = result;
1591        nameCapacity = resultCapacity;
1592    }
1593
1594    /* get all pieces, one after another, and separate with '_' */
1595    len=_getLanguage(localeID, name, nameCapacity, &localeID);
1596
1597    if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1598        const char *d = uloc_getDefault();
1599
1600        len = uprv_strlen(d);
1601
1602        if (name != NULL) {
1603            uprv_strncpy(name, d, len);
1604        }
1605    } else if(_isIDSeparator(*localeID)) {
1606        const char *scriptID;
1607
1608        ++fieldCount;
1609        if(len<nameCapacity) {
1610            name[len]='_';
1611        }
1612        ++len;
1613
1614        scriptSize=_getScript(localeID+1, name+len, nameCapacity-len, &scriptID);
1615        if(scriptSize > 0) {
1616            /* Found optional script */
1617            localeID = scriptID;
1618            ++fieldCount;
1619            len+=scriptSize;
1620            if (_isIDSeparator(*localeID)) {
1621                /* If there is something else, then we add the _ */
1622                if(len<nameCapacity) {
1623                    name[len]='_';
1624                }
1625                ++len;
1626            }
1627        }
1628
1629        if (_isIDSeparator(*localeID)) {
1630            len+=_getCountry(localeID+1, name+len, nameCapacity-len, &localeID);
1631            if(_isIDSeparator(*localeID)) {
1632                ++fieldCount;
1633                if(len<nameCapacity) {
1634                    name[len]='_';
1635                }
1636                ++len;
1637                variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len);
1638                if (variantSize > 0) {
1639                    variant = name+len;
1640                    len += variantSize;
1641                    localeID += variantSize + 1; /* skip '_' and variant */
1642                }
1643            }
1644        }
1645    }
1646
1647    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1648    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {
1649        UBool done = FALSE;
1650        do {
1651            char c = *localeID;
1652            switch (c) {
1653            case 0:
1654            case '@':
1655                done = TRUE;
1656                break;
1657            default:
1658                if (len<nameCapacity) {
1659                    name[len] = c;
1660                }
1661                ++len;
1662                ++localeID;
1663                break;
1664            }
1665        } while (!done);
1666    }
1667
1668    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1669       After this, localeID either points to '@' or is NULL */
1670    if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1671        keywordAssign = uprv_strchr(localeID, '=');
1672        separatorIndicator = uprv_strchr(localeID, ';');
1673    }
1674
1675    /* Copy POSIX-style variant, if any [mr@FOO] */
1676    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1677        localeID != NULL && keywordAssign == NULL) {
1678        for (;;) {
1679            char c = *localeID;
1680            if (c == 0) {
1681                break;
1682            }
1683            if (len<nameCapacity) {
1684                name[len] = c;
1685            }
1686            ++len;
1687            ++localeID;
1688        }
1689    }
1690
1691    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1692        /* Handle @FOO variant if @ is present and not followed by = */
1693        if (localeID!=NULL && keywordAssign==NULL) {
1694            int32_t posixVariantSize;
1695            /* Add missing '_' if needed */
1696            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1697                do {
1698                    if(len<nameCapacity) {
1699                        name[len]='_';
1700                    }
1701                    ++len;
1702                    ++fieldCount;
1703                } while(fieldCount<2);
1704            }
1705            posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len,
1706                                             (UBool)(variantSize > 0));
1707            if (posixVariantSize > 0) {
1708                if (variant == NULL) {
1709                    variant = name+len;
1710                }
1711                len += posixVariantSize;
1712                variantSize += posixVariantSize;
1713            }
1714        }
1715
1716        /* Check for EURO variants. */
1717        sawEuro = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), "EURO", 4);
1718        len -= sawEuro;
1719        if (sawEuro > 0 && name[len-1] == '_') { /* delete trailing '_' */
1720            --len;
1721        }
1722
1723        /* Look up the ID in the canonicalization map */
1724        for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1725            const char* id = CANONICALIZE_MAP[j].id;
1726            int32_t n = (int32_t)uprv_strlen(id);
1727            if (len == n && uprv_strncmp(name, id, n) == 0) {
1728                if (n == 0 && localeID != NULL) {
1729                    break; /* Don't remap "" if keywords present */
1730                }
1731                len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1732                addKeyword = CANONICALIZE_MAP[j].keyword;
1733                addValue = CANONICALIZE_MAP[j].value;
1734                break;
1735            }
1736        }
1737
1738        /* Explicit EURO variant overrides keyword in CANONICALIZE_MAP */
1739        if (sawEuro > 0) {
1740            addKeyword = "currency";
1741            addValue = "EUR";
1742        }
1743    }
1744
1745    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1746        if (localeID!=NULL && keywordAssign!=NULL &&
1747            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1748            if(len<nameCapacity) {
1749                name[len]='@';
1750            }
1751            ++len;
1752            ++fieldCount;
1753            len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
1754                                addKeyword, addValue, err);
1755        } else if (addKeyword != NULL) {
1756            U_ASSERT(addValue != NULL);
1757            /* inelegant but works -- later make _getKeywords do this? */
1758            len += _copyCount(name+len, nameCapacity-len, "@");
1759            len += _copyCount(name+len, nameCapacity-len, addKeyword);
1760            len += _copyCount(name+len, nameCapacity-len, "=");
1761            len += _copyCount(name+len, nameCapacity-len, addValue);
1762        }
1763    }
1764
1765    if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1766        uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1767    }
1768
1769    return u_terminateChars(result, resultCapacity, len, err);
1770}
1771
1772/* ### ID parsing API **************************************************/
1773
1774U_CAPI int32_t  U_EXPORT2
1775uloc_getParent(const char*    localeID,
1776               char* parent,
1777               int32_t parentCapacity,
1778               UErrorCode* err)
1779{
1780    const char *lastUnderscore;
1781    int32_t i;
1782
1783    if (U_FAILURE(*err))
1784        return 0;
1785
1786    if (localeID == NULL)
1787        localeID = uloc_getDefault();
1788
1789    lastUnderscore=uprv_strrchr(localeID, '_');
1790    if(lastUnderscore!=NULL) {
1791        i=(int32_t)(lastUnderscore-localeID);
1792    } else {
1793        i=0;
1794    }
1795
1796    if(i>0 && parent != localeID) {
1797        uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1798    }
1799    return u_terminateChars(parent, parentCapacity, i, err);
1800}
1801
1802U_CAPI int32_t U_EXPORT2
1803uloc_getLanguage(const char*    localeID,
1804         char* language,
1805         int32_t languageCapacity,
1806         UErrorCode* err)
1807{
1808    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1809    int32_t i=0;
1810
1811    if (err==NULL || U_FAILURE(*err)) {
1812        return 0;
1813    }
1814
1815    if(localeID==NULL) {
1816        localeID=uloc_getDefault();
1817    }
1818
1819    i=_getLanguage(localeID, language, languageCapacity, NULL);
1820    return u_terminateChars(language, languageCapacity, i, err);
1821}
1822
1823U_CAPI int32_t U_EXPORT2
1824uloc_getScript(const char*    localeID,
1825         char* script,
1826         int32_t scriptCapacity,
1827         UErrorCode* err)
1828{
1829    int32_t i=0;
1830
1831    if(err==NULL || U_FAILURE(*err)) {
1832        return 0;
1833    }
1834
1835    if(localeID==NULL) {
1836        localeID=uloc_getDefault();
1837    }
1838
1839    /* skip the language */
1840    _getLanguage(localeID, NULL, 0, &localeID);
1841    if(_isIDSeparator(*localeID)) {
1842        i=_getScript(localeID+1, script, scriptCapacity, NULL);
1843    }
1844    return u_terminateChars(script, scriptCapacity, i, err);
1845}
1846
1847U_CAPI int32_t  U_EXPORT2
1848uloc_getCountry(const char* localeID,
1849            char* country,
1850            int32_t countryCapacity,
1851            UErrorCode* err)
1852{
1853    int32_t i=0;
1854
1855    if(err==NULL || U_FAILURE(*err)) {
1856        return 0;
1857    }
1858
1859    if(localeID==NULL) {
1860        localeID=uloc_getDefault();
1861    }
1862
1863    /* Skip the language */
1864    _getLanguage(localeID, NULL, 0, &localeID);
1865    if(_isIDSeparator(*localeID)) {
1866        const char *scriptID;
1867        /* Skip the script if available */
1868        _getScript(localeID+1, NULL, 0, &scriptID);
1869        if(scriptID != localeID+1) {
1870            /* Found optional script */
1871            localeID = scriptID;
1872        }
1873        if(_isIDSeparator(*localeID)) {
1874            i=_getCountry(localeID+1, country, countryCapacity, NULL);
1875        }
1876    }
1877    return u_terminateChars(country, countryCapacity, i, err);
1878}
1879
1880U_CAPI int32_t  U_EXPORT2
1881uloc_getVariant(const char* localeID,
1882                char* variant,
1883                int32_t variantCapacity,
1884                UErrorCode* err)
1885{
1886    int32_t i=0;
1887
1888    if(err==NULL || U_FAILURE(*err)) {
1889        return 0;
1890    }
1891
1892    if(localeID==NULL) {
1893        localeID=uloc_getDefault();
1894    }
1895
1896    /* Skip the language */
1897    _getLanguage(localeID, NULL, 0, &localeID);
1898    if(_isIDSeparator(*localeID)) {
1899        const char *scriptID;
1900        /* Skip the script if available */
1901        _getScript(localeID+1, NULL, 0, &scriptID);
1902        if(scriptID != localeID+1) {
1903            /* Found optional script */
1904            localeID = scriptID;
1905        }
1906        /* Skip the Country */
1907        if (_isIDSeparator(*localeID)) {
1908            _getCountry(localeID+1, NULL, 0, &localeID);
1909            if(_isIDSeparator(*localeID)) {
1910                i=_getVariant(localeID+1, *localeID, variant, variantCapacity);
1911            }
1912        }
1913    }
1914
1915    /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1916    /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1917/*
1918    if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1919        i=_getVariant(localeID+1, '@', variant, variantCapacity);
1920    }
1921*/
1922    return u_terminateChars(variant, variantCapacity, i, err);
1923}
1924
1925U_CAPI int32_t  U_EXPORT2
1926uloc_getName(const char* localeID,
1927             char* name,
1928             int32_t nameCapacity,
1929             UErrorCode* err)
1930{
1931    return _canonicalize(localeID, name, nameCapacity, 0, err);
1932}
1933
1934U_CAPI int32_t  U_EXPORT2
1935uloc_getBaseName(const char* localeID,
1936                 char* name,
1937                 int32_t nameCapacity,
1938                 UErrorCode* err)
1939{
1940    return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1941}
1942
1943U_CAPI int32_t  U_EXPORT2
1944uloc_canonicalize(const char* localeID,
1945                  char* name,
1946                  int32_t nameCapacity,
1947                  UErrorCode* err)
1948{
1949    return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1950}
1951
1952U_CAPI const char*  U_EXPORT2
1953uloc_getISO3Language(const char* localeID)
1954{
1955    int16_t offset;
1956    char lang[ULOC_LANG_CAPACITY];
1957    UErrorCode err = U_ZERO_ERROR;
1958
1959    if (localeID == NULL)
1960    {
1961        localeID = uloc_getDefault();
1962    }
1963    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1964    if (U_FAILURE(err))
1965        return "";
1966    offset = _findIndex(LANGUAGES, lang);
1967    if (offset < 0)
1968        return "";
1969    return LANGUAGES_3[offset];
1970}
1971
1972U_CAPI const char*  U_EXPORT2
1973uloc_getISO3Country(const char* localeID)
1974{
1975    int16_t offset;
1976    char cntry[ULOC_LANG_CAPACITY];
1977    UErrorCode err = U_ZERO_ERROR;
1978
1979    if (localeID == NULL)
1980    {
1981        localeID = uloc_getDefault();
1982    }
1983    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
1984    if (U_FAILURE(err))
1985        return "";
1986    offset = _findIndex(COUNTRIES, cntry);
1987    if (offset < 0)
1988        return "";
1989
1990    return COUNTRIES_3[offset];
1991}
1992
1993U_CAPI uint32_t  U_EXPORT2
1994uloc_getLCID(const char* localeID)
1995{
1996    UErrorCode status = U_ZERO_ERROR;
1997    char       langID[ULOC_FULLNAME_CAPACITY];
1998
1999    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2000    if (U_FAILURE(status)) {
2001        return 0;
2002    }
2003
2004    return uprv_convertToLCID(langID, localeID, &status);
2005}
2006
2007U_CAPI int32_t U_EXPORT2
2008uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2009                UErrorCode *status)
2010{
2011    int32_t length;
2012    const char *posix = uprv_convertToPosix(hostid, status);
2013    if (U_FAILURE(*status) || posix == NULL) {
2014        return 0;
2015    }
2016    length = (int32_t)uprv_strlen(posix);
2017    if (length+1 > localeCapacity) {
2018        *status = U_BUFFER_OVERFLOW_ERROR;
2019    }
2020    else {
2021        uprv_strcpy(locale, posix);
2022    }
2023    return length;
2024}
2025
2026/* ### Default locale **************************************************/
2027
2028U_CAPI const char*  U_EXPORT2
2029uloc_getDefault()
2030{
2031    return locale_get_default();
2032}
2033
2034U_CAPI void  U_EXPORT2
2035uloc_setDefault(const char*   newDefaultLocale,
2036             UErrorCode* err)
2037{
2038    if (U_FAILURE(*err))
2039        return;
2040    /* the error code isn't currently used for anything by this function*/
2041
2042    /* propagate change to C++ */
2043    locale_set_default(newDefaultLocale);
2044}
2045
2046/* ### Display name **************************************************/
2047
2048/*
2049 * Lookup a resource bundle table item with fallback on the table level.
2050 * Regular resource bundle lookups perform fallback to parent locale bundles
2051 * and eventually the root bundle, but only for top-level items.
2052 * This function takes the name of a top-level table and of an item in that table
2053 * and performs a lookup of both, falling back until a bundle contains a table
2054 * with this item.
2055 *
2056 * Note: Only the opening of entire bundles falls back through the default locale
2057 * before root. Once a bundle is open, item lookups do not go through the
2058 * default locale because that would result in a mix of languages that is
2059 * unpredictable to the programmer and most likely useless.
2060 */
2061static const UChar *
2062_res_getTableStringWithFallback(const char *path, const char *locale,
2063                              const char *tableKey, const char *subTableKey,
2064                              const char *itemKey,
2065                              int32_t *pLength,
2066                              UErrorCode *pErrorCode)
2067{
2068/*    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
2069    UResourceBundle *rb=NULL, table, subTable;
2070    const UChar *item=NULL;
2071    UErrorCode errorCode;
2072    char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
2073
2074    /*
2075     * open the bundle for the current locale
2076     * this falls back through the locale's chain to root
2077     */
2078    errorCode=U_ZERO_ERROR;
2079    rb=ures_open(path, locale, &errorCode);
2080    if(U_FAILURE(errorCode)) {
2081        /* total failure, not even root could be opened */
2082        *pErrorCode=errorCode;
2083        return NULL;
2084    } else if(errorCode==U_USING_DEFAULT_WARNING ||
2085                (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
2086    ) {
2087        /* set the "strongest" error code (success->fallback->default->failure) */
2088        *pErrorCode=errorCode;
2089    }
2090
2091    for(;;){
2092        ures_initStackObject(&table);
2093        ures_initStackObject(&subTable);
2094        ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode);
2095        if (subTableKey != NULL) {
2096            /*
2097            ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
2098            item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
2099            if(U_FAILURE(errorCode)){
2100                *pErrorCode = errorCode;
2101            }
2102
2103            break;*/
2104
2105            ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode);
2106        }
2107        if(U_SUCCESS(errorCode)){
2108            item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode);
2109            if(U_FAILURE(errorCode)){
2110                const char* replacement = NULL;
2111                *pErrorCode = errorCode; /*save the errorCode*/
2112                errorCode = U_ZERO_ERROR;
2113                /* may be a deprecated code */
2114                if(uprv_strcmp(tableKey, "Countries")==0){
2115                    replacement =  uloc_getCurrentCountryID(itemKey);
2116                }else if(uprv_strcmp(tableKey, "Languages")==0){
2117                    replacement =  uloc_getCurrentLanguageID(itemKey);
2118                }
2119                /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
2120                if(replacement!=NULL && itemKey != replacement){
2121                    item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode);
2122                    if(U_SUCCESS(errorCode)){
2123                        *pErrorCode = errorCode;
2124                        break;
2125                    }
2126                }
2127            }else{
2128                break;
2129            }
2130        }
2131
2132        if(U_FAILURE(errorCode)){
2133
2134            /* still can't figure out ?.. try the fallback mechanism */
2135            int32_t len = 0;
2136            const UChar* fallbackLocale =  NULL;
2137            *pErrorCode = errorCode;
2138            errorCode = U_ZERO_ERROR;
2139
2140            fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode);
2141            if(U_FAILURE(errorCode)){
2142               *pErrorCode = errorCode;
2143                break;
2144            }
2145
2146            u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
2147
2148            /* guard against recursive fallback */
2149            if(uprv_strcmp(explicitFallbackName, locale)==0){
2150                *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
2151                break;
2152            }
2153            ures_close(rb);
2154            rb = ures_open(NULL, explicitFallbackName, &errorCode);
2155            if(U_FAILURE(errorCode)){
2156                *pErrorCode = errorCode;
2157                break;
2158            }
2159            /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
2160        }else{
2161            break;
2162        }
2163    }
2164    /* done with the locale string - ready to close table and rb */
2165    ures_close(&subTable);
2166    ures_close(&table);
2167    ures_close(rb);
2168    return item;
2169}
2170
2171static int32_t
2172_getStringOrCopyKey(const char *path, const char *locale,
2173                    const char *tableKey,
2174                    const char* subTableKey,
2175                    const char *itemKey,
2176                    const char *substitute,
2177                    UChar *dest, int32_t destCapacity,
2178                    UErrorCode *pErrorCode) {
2179    const UChar *s = NULL;
2180    int32_t length = 0;
2181
2182    if(itemKey==NULL) {
2183        /* top-level item: normal resource bundle access */
2184        UResourceBundle *rb;
2185
2186        rb=ures_open(path, locale, pErrorCode);
2187        if(U_SUCCESS(*pErrorCode)) {
2188            s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
2189            /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
2190            ures_close(rb);
2191        }
2192    } else {
2193        /* second-level item, use special fallback */
2194        s=_res_getTableStringWithFallback(path, locale,
2195                                           tableKey,
2196                                           subTableKey,
2197                                           itemKey,
2198                                           &length,
2199                                           pErrorCode);
2200    }
2201    if(U_SUCCESS(*pErrorCode)) {
2202        int32_t copyLength=uprv_min(length, destCapacity);
2203        if(copyLength>0 && s != NULL) {
2204            u_memcpy(dest, s, copyLength);
2205        }
2206    } else {
2207        /* no string from a resource bundle: convert the substitute */
2208        length=(int32_t)uprv_strlen(substitute);
2209        u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
2210        *pErrorCode=U_USING_DEFAULT_WARNING;
2211    }
2212
2213    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2214}
2215
2216static int32_t
2217_getDisplayNameForComponent(const char *locale,
2218                            const char *displayLocale,
2219                            UChar *dest, int32_t destCapacity,
2220                            int32_t (*getter)(const char *, char *, int32_t, UErrorCode *),
2221                            const char *tag,
2222                            UErrorCode *pErrorCode) {
2223    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
2224    int32_t length;
2225    UErrorCode localStatus;
2226
2227    /* argument checking */
2228    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2229        return 0;
2230    }
2231
2232    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2233        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2234        return 0;
2235    }
2236
2237    localStatus = U_ZERO_ERROR;
2238    length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
2239    if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
2240        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2241        return 0;
2242    }
2243    if(length==0) {
2244        return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
2245    }
2246
2247    return _getStringOrCopyKey(NULL, displayLocale,
2248                               tag, NULL, localeBuffer,
2249                               localeBuffer,
2250                               dest, destCapacity,
2251                               pErrorCode);
2252}
2253
2254U_CAPI int32_t U_EXPORT2
2255uloc_getDisplayLanguage(const char *locale,
2256                        const char *displayLocale,
2257                        UChar *dest, int32_t destCapacity,
2258                        UErrorCode *pErrorCode) {
2259    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2260                uloc_getLanguage, _kLanguages, pErrorCode);
2261}
2262
2263U_CAPI int32_t U_EXPORT2
2264uloc_getDisplayScript(const char* locale,
2265                      const char* displayLocale,
2266                      UChar *dest, int32_t destCapacity,
2267                      UErrorCode *pErrorCode)
2268{
2269    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2270                uloc_getScript, _kScripts, pErrorCode);
2271}
2272
2273U_CAPI int32_t U_EXPORT2
2274uloc_getDisplayCountry(const char *locale,
2275                       const char *displayLocale,
2276                       UChar *dest, int32_t destCapacity,
2277                       UErrorCode *pErrorCode) {
2278    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2279                uloc_getCountry, _kCountries, pErrorCode);
2280}
2281
2282/*
2283 * TODO separate variant1_variant2_variant3...
2284 * by getting each tag's display string and concatenating them with ", "
2285 * in between - similar to uloc_getDisplayName()
2286 */
2287U_CAPI int32_t U_EXPORT2
2288uloc_getDisplayVariant(const char *locale,
2289                       const char *displayLocale,
2290                       UChar *dest, int32_t destCapacity,
2291                       UErrorCode *pErrorCode) {
2292    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2293                uloc_getVariant, _kVariants, pErrorCode);
2294}
2295
2296U_CAPI int32_t U_EXPORT2
2297uloc_getDisplayName(const char *locale,
2298                    const char *displayLocale,
2299                    UChar *dest, int32_t destCapacity,
2300                    UErrorCode *pErrorCode)
2301{
2302    int32_t length, length2, length3 = 0;
2303    UBool hasLanguage, hasScript, hasCountry, hasVariant, hasKeywords;
2304    UEnumeration* keywordEnum = NULL;
2305    int32_t keywordCount = 0;
2306    const char *keyword = NULL;
2307    int32_t keywordLen = 0;
2308    char keywordValue[256];
2309    int32_t keywordValueLen = 0;
2310
2311    /* argument checking */
2312    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2313        return 0;
2314    }
2315
2316    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2317        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2318        return 0;
2319    }
2320
2321    /*
2322     * if there is a language, then write "language (country, variant)"
2323     * otherwise write "country, variant"
2324     */
2325
2326    /* write the language */
2327    length=uloc_getDisplayLanguage(locale, displayLocale,
2328                                   dest, destCapacity,
2329                                   pErrorCode);
2330    hasLanguage= length>0;
2331
2332    if(hasLanguage) {
2333        /* append " (" */
2334        if(length<destCapacity) {
2335            dest[length]=0x20;
2336        }
2337        ++length;
2338        if(length<destCapacity) {
2339            dest[length]=0x28;
2340        }
2341        ++length;
2342    }
2343
2344    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2345        /* keep preflighting */
2346        *pErrorCode=U_ZERO_ERROR;
2347    }
2348
2349    /* append the script */
2350    if(length<destCapacity) {
2351        length2=uloc_getDisplayScript(locale, displayLocale,
2352                                       dest+length, destCapacity-length,
2353                                       pErrorCode);
2354    } else {
2355        length2=uloc_getDisplayScript(locale, displayLocale,
2356                                       NULL, 0,
2357                                       pErrorCode);
2358    }
2359    hasScript= length2>0;
2360    length+=length2;
2361
2362    if(hasScript) {
2363        /* append ", " */
2364        if(length<destCapacity) {
2365            dest[length]=0x2c;
2366        }
2367        ++length;
2368        if(length<destCapacity) {
2369            dest[length]=0x20;
2370        }
2371        ++length;
2372    }
2373
2374    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2375        /* keep preflighting */
2376        *pErrorCode=U_ZERO_ERROR;
2377    }
2378
2379    /* append the country */
2380    if(length<destCapacity) {
2381        length2=uloc_getDisplayCountry(locale, displayLocale,
2382                                       dest+length, destCapacity-length,
2383                                       pErrorCode);
2384    } else {
2385        length2=uloc_getDisplayCountry(locale, displayLocale,
2386                                       NULL, 0,
2387                                       pErrorCode);
2388    }
2389    hasCountry= length2>0;
2390    length+=length2;
2391
2392    if(hasCountry) {
2393        /* append ", " */
2394        if(length<destCapacity) {
2395            dest[length]=0x2c;
2396        }
2397        ++length;
2398        if(length<destCapacity) {
2399            dest[length]=0x20;
2400        }
2401        ++length;
2402    }
2403
2404    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2405        /* keep preflighting */
2406        *pErrorCode=U_ZERO_ERROR;
2407    }
2408
2409    /* append the variant */
2410    if(length<destCapacity) {
2411        length2=uloc_getDisplayVariant(locale, displayLocale,
2412                                       dest+length, destCapacity-length,
2413                                       pErrorCode);
2414    } else {
2415        length2=uloc_getDisplayVariant(locale, displayLocale,
2416                                       NULL, 0,
2417                                       pErrorCode);
2418    }
2419    hasVariant= length2>0;
2420    length+=length2;
2421
2422    if(hasVariant) {
2423        /* append ", " */
2424        if(length<destCapacity) {
2425            dest[length]=0x2c;
2426        }
2427        ++length;
2428        if(length<destCapacity) {
2429            dest[length]=0x20;
2430        }
2431        ++length;
2432    }
2433
2434    keywordEnum = uloc_openKeywords(locale, pErrorCode);
2435
2436    for(keywordCount = uenum_count(keywordEnum, pErrorCode); keywordCount > 0 ; keywordCount--){
2437          if(U_FAILURE(*pErrorCode)){
2438              break;
2439          }
2440          /* the uenum_next returns NUL terminated string */
2441          keyword = uenum_next(keywordEnum, &keywordLen, pErrorCode);
2442          if(length + length3 < destCapacity) {
2443            length3 += uloc_getDisplayKeyword(keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2444          } else {
2445            length3 += uloc_getDisplayKeyword(keyword, displayLocale, NULL, 0, pErrorCode);
2446          }
2447          if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2448              /* keep preflighting */
2449              *pErrorCode=U_ZERO_ERROR;
2450          }
2451          keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, 256, pErrorCode);
2452          if(keywordValueLen) {
2453            if(length + length3 < destCapacity) {
2454              dest[length + length3] = 0x3D;
2455            }
2456            length3++;
2457            if(length + length3 < destCapacity) {
2458              length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2459            } else {
2460              length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, NULL, 0, pErrorCode);
2461            }
2462            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2463                /* keep preflighting */
2464                *pErrorCode=U_ZERO_ERROR;
2465            }
2466          }
2467          if(keywordCount > 1) {
2468            if(length + length3 + 1 < destCapacity && keywordCount) {
2469              dest[length + length3]=0x2c;
2470              dest[length + length3+1]=0x20;
2471            }
2472            length3++; /* ',' */
2473            length3++; /* ' ' */
2474          }
2475    }
2476    uenum_close(keywordEnum);
2477
2478    hasKeywords = length3 > 0;
2479    length += length3;
2480
2481
2482
2483    if ((hasScript && !hasCountry)
2484        || ((hasScript || hasCountry) && !hasVariant && !hasKeywords)
2485        || ((hasScript || hasCountry || hasVariant) && !hasKeywords)
2486        || (hasLanguage && !hasScript && !hasCountry && !hasVariant && !hasKeywords))
2487    {
2488        /* remove ", " or " (" */
2489        length-=2;
2490    }
2491
2492    if (hasLanguage && (hasScript || hasCountry || hasVariant || hasKeywords)) {
2493        /* append ")" */
2494        if(length<destCapacity) {
2495            dest[length]=0x29;
2496        }
2497        ++length;
2498    }
2499
2500    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2501        /* keep preflighting */
2502        *pErrorCode=U_ZERO_ERROR;
2503    }
2504
2505    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2506}
2507
2508U_CAPI int32_t U_EXPORT2
2509uloc_getDisplayKeyword(const char* keyword,
2510                       const char* displayLocale,
2511                       UChar* dest,
2512                       int32_t destCapacity,
2513                       UErrorCode* status){
2514
2515    /* argument checking */
2516    if(status==NULL || U_FAILURE(*status)) {
2517        return 0;
2518    }
2519
2520    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2521        *status=U_ILLEGAL_ARGUMENT_ERROR;
2522        return 0;
2523    }
2524
2525
2526    /* pass itemKey=NULL to look for a top-level item */
2527    return _getStringOrCopyKey(NULL, displayLocale,
2528                               _kKeys, NULL,
2529                               keyword,
2530                               keyword,
2531                               dest, destCapacity,
2532                               status);
2533
2534}
2535
2536
2537#define UCURRENCY_DISPLAY_NAME_INDEX 1
2538
2539U_CAPI int32_t U_EXPORT2
2540uloc_getDisplayKeywordValue(   const char* locale,
2541                               const char* keyword,
2542                               const char* displayLocale,
2543                               UChar* dest,
2544                               int32_t destCapacity,
2545                               UErrorCode* status){
2546
2547
2548    char keywordValue[ULOC_FULLNAME_CAPACITY*4];
2549    int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
2550    int32_t keywordValueLen =0;
2551
2552    /* argument checking */
2553    if(status==NULL || U_FAILURE(*status)) {
2554        return 0;
2555    }
2556
2557    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2558        *status=U_ILLEGAL_ARGUMENT_ERROR;
2559        return 0;
2560    }
2561
2562    /* get the keyword value */
2563    keywordValue[0]=0;
2564    keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
2565
2566    /*
2567     * if the keyword is equal to currency .. then to get the display name
2568     * we need to do the fallback ourselves
2569     */
2570    if(uprv_stricmp(keyword, _kCurrency)==0){
2571
2572        int32_t dispNameLen = 0;
2573        const UChar *dispName = NULL;
2574
2575        UResourceBundle *bundle     = ures_open(NULL, displayLocale, status);
2576        UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
2577        UResourceBundle *currency   = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
2578
2579        dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
2580
2581        /*close the bundles */
2582        ures_close(currency);
2583        ures_close(currencies);
2584        ures_close(bundle);
2585
2586        if(U_FAILURE(*status)){
2587            if(*status == U_MISSING_RESOURCE_ERROR){
2588                /* we just want to write the value over if nothing is available */
2589                *status = U_USING_DEFAULT_WARNING;
2590            }else{
2591                return 0;
2592            }
2593        }
2594
2595        /* now copy the dispName over if not NULL */
2596        if(dispName != NULL){
2597            if(dispNameLen <= destCapacity){
2598                uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
2599                return u_terminateUChars(dest, destCapacity, dispNameLen, status);
2600            }else{
2601                *status = U_BUFFER_OVERFLOW_ERROR;
2602                return dispNameLen;
2603            }
2604        }else{
2605            /* we have not found the display name for the value .. just copy over */
2606            if(keywordValueLen <= destCapacity){
2607                u_charsToUChars(keywordValue, dest, keywordValueLen);
2608                return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
2609            }else{
2610                 *status = U_BUFFER_OVERFLOW_ERROR;
2611                return keywordValueLen;
2612            }
2613        }
2614
2615
2616    }else{
2617
2618        return _getStringOrCopyKey(NULL, displayLocale,
2619                                   _kTypes, keyword,
2620                                   keywordValue,
2621                                   keywordValue,
2622                                   dest, destCapacity,
2623                                   status);
2624    }
2625}
2626
2627/* ### Get available **************************************************/
2628
2629static UBool U_CALLCONV uloc_cleanup(void) {
2630    char ** temp;
2631
2632    if (_installedLocales) {
2633        temp = _installedLocales;
2634        _installedLocales = NULL;
2635
2636        _installedLocalesCount = 0;
2637
2638        uprv_free(temp);
2639    }
2640    return TRUE;
2641}
2642
2643static void _load_installedLocales()
2644{
2645    UBool   localesLoaded;
2646
2647    UMTX_CHECK(NULL, _installedLocales != NULL, localesLoaded);
2648
2649    if (localesLoaded == FALSE) {
2650        UResourceBundle *index = NULL;
2651        UResourceBundle installed;
2652        UErrorCode status = U_ZERO_ERROR;
2653        char ** temp;
2654        int32_t i = 0;
2655        int32_t localeCount;
2656
2657        ures_initStackObject(&installed);
2658        index = ures_openDirect(NULL, _kIndexLocaleName, &status);
2659        ures_getByKey(index, _kIndexTag, &installed, &status);
2660
2661        if(U_SUCCESS(status)) {
2662            localeCount = ures_getSize(&installed);
2663            temp = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
2664
2665            ures_resetIterator(&installed);
2666            while(ures_hasNext(&installed)) {
2667                ures_getNextString(&installed, NULL, (const char **)&temp[i++], &status);
2668            }
2669            temp[i] = NULL;
2670
2671            umtx_lock(NULL);
2672            if (_installedLocales == NULL)
2673            {
2674                _installedLocales = temp;
2675                _installedLocalesCount = localeCount;
2676                temp = NULL;
2677                ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
2678            }
2679            umtx_unlock(NULL);
2680
2681            uprv_free(temp);
2682            ures_close(&installed);
2683        }
2684        ures_close(index);
2685    }
2686}
2687
2688U_CAPI const char* U_EXPORT2
2689uloc_getAvailable(int32_t offset)
2690{
2691
2692    _load_installedLocales();
2693
2694    if (offset > _installedLocalesCount)
2695        return NULL;
2696    return _installedLocales[offset];
2697}
2698
2699U_CAPI int32_t  U_EXPORT2
2700uloc_countAvailable()
2701{
2702    _load_installedLocales();
2703    return _installedLocalesCount;
2704}
2705
2706/**
2707 * Returns a list of all language codes defined in ISO 639.  This is a pointer
2708 * to an array of pointers to arrays of char.  All of these pointers are owned
2709 * by ICU-- do not delete them, and do not write through them.  The array is
2710 * terminated with a null pointer.
2711 */
2712U_CAPI const char* const*  U_EXPORT2
2713uloc_getISOLanguages()
2714{
2715    return LANGUAGES;
2716}
2717
2718/**
2719 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2720 * pointer to an array of pointers to arrays of char.  All of these pointers are
2721 * owned by ICU-- do not delete them, and do not write through them.  The array is
2722 * terminated with a null pointer.
2723 */
2724U_CAPI const char* const*  U_EXPORT2
2725uloc_getISOCountries()
2726{
2727    return COUNTRIES;
2728}
2729
2730
2731/* this function to be moved into cstring.c later */
2732static char gDecimal = 0;
2733
2734static /* U_CAPI */
2735double
2736/* U_EXPORT2 */
2737_uloc_strtod(const char *start, char **end) {
2738    char *decimal;
2739    char *myEnd;
2740    char buf[30];
2741    double rv;
2742    if (!gDecimal) {
2743        char rep[5];
2744        /* For machines that decide to change the decimal on you,
2745        and try to be too smart with localization.
2746        This normally should be just a '.'. */
2747        sprintf(rep, "%+1.1f", 1.0);
2748        gDecimal = rep[2];
2749    }
2750
2751    if(gDecimal == '.') {
2752        return uprv_strtod(start, end); /* fall through to OS */
2753    } else {
2754        uprv_strncpy(buf, start, 29);
2755        buf[29]=0;
2756        decimal = uprv_strchr(buf, '.');
2757        if(decimal) {
2758            *decimal = gDecimal;
2759        } else {
2760            return uprv_strtod(start, end); /* no decimal point */
2761        }
2762        rv = uprv_strtod(buf, &myEnd);
2763        if(end) {
2764            *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2765        }
2766        return rv;
2767    }
2768}
2769
2770typedef struct {
2771    float q;
2772    int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2773    char *locale;
2774} _acceptLangItem;
2775
2776static int32_t U_CALLCONV
2777uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
2778{
2779    const _acceptLangItem *aa = (const _acceptLangItem*)a;
2780    const _acceptLangItem *bb = (const _acceptLangItem*)b;
2781
2782    int32_t rc = 0;
2783    if(bb->q < aa->q) {
2784        rc = -1;  /* A > B */
2785    } else if(bb->q > aa->q) {
2786        rc = 1;   /* A < B */
2787    } else {
2788        rc = 0;   /* A = B */
2789    }
2790
2791    if(rc==0) {
2792        rc = uprv_stricmp(aa->locale, bb->locale);
2793    }
2794
2795#if defined(ULOC_DEBUG)
2796    /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2797    aa->locale, aa->q,
2798    bb->locale, bb->q,
2799    rc);*/
2800#endif
2801
2802    return rc;
2803}
2804
2805/*
2806mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2807*/
2808
2809U_CAPI int32_t U_EXPORT2
2810uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2811                            const char *httpAcceptLanguage,
2812                            UEnumeration* availableLocales,
2813                            UErrorCode *status)
2814{
2815    _acceptLangItem *j;
2816    _acceptLangItem smallBuffer[30];
2817    char **strs;
2818    char tmp[ULOC_FULLNAME_CAPACITY +1];
2819    int32_t n = 0;
2820    const char *itemEnd;
2821    const char *paramEnd;
2822    const char *s;
2823    const char *t;
2824    int32_t res;
2825    int32_t i;
2826    int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2827    int32_t jSize;
2828
2829    j = smallBuffer;
2830    jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2831    if(U_FAILURE(*status)) {
2832        return -1;
2833    }
2834
2835    for(s=httpAcceptLanguage;s&&*s;) {
2836        while(isspace(*s)) /* eat space at the beginning */
2837            s++;
2838        itemEnd=uprv_strchr(s,',');
2839        paramEnd=uprv_strchr(s,';');
2840        if(!itemEnd) {
2841            itemEnd = httpAcceptLanguage+l; /* end of string */
2842        }
2843        if(paramEnd && paramEnd<itemEnd) {
2844            /* semicolon (;) is closer than end (,) */
2845            t = paramEnd+1;
2846            if(*t=='q') {
2847                t++;
2848            }
2849            while(isspace(*t)) {
2850                t++;
2851            }
2852            if(*t=='=') {
2853                t++;
2854            }
2855            while(isspace(*t)) {
2856                t++;
2857            }
2858            j[n].q = (float)_uloc_strtod(t,NULL);
2859        } else {
2860            /* no semicolon - it's 1.0 */
2861            j[n].q = 1.0f;
2862            paramEnd = itemEnd;
2863        }
2864        j[n].dummy=0;
2865        /* eat spaces prior to semi */
2866        for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2867            ;
2868        j[n].locale = uprv_strndup(s,(int32_t)((t+1)-s));
2869        uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2870        if(strcmp(j[n].locale,tmp)) {
2871            uprv_free(j[n].locale);
2872            j[n].locale=uprv_strdup(tmp);
2873        }
2874#if defined(ULOC_DEBUG)
2875        /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2876#endif
2877        n++;
2878        s = itemEnd;
2879        while(*s==',') { /* eat duplicate commas */
2880            s++;
2881        }
2882        if(n>=jSize) {
2883            if(j==smallBuffer) {  /* overflowed the small buffer. */
2884                j = uprv_malloc(sizeof(j[0])*(jSize*2));
2885                if(j!=NULL) {
2886                    uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2887                }
2888#if defined(ULOC_DEBUG)
2889                fprintf(stderr,"malloced at size %d\n", jSize);
2890#endif
2891            } else {
2892                j = uprv_realloc(j, sizeof(j[0])*jSize*2);
2893#if defined(ULOC_DEBUG)
2894                fprintf(stderr,"re-alloced at size %d\n", jSize);
2895#endif
2896            }
2897            jSize *= 2;
2898            if(j==NULL) {
2899                *status = U_MEMORY_ALLOCATION_ERROR;
2900                return -1;
2901            }
2902        }
2903    }
2904    uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2905    if(U_FAILURE(*status)) {
2906        if(j != smallBuffer) {
2907#if defined(ULOC_DEBUG)
2908            fprintf(stderr,"freeing j %p\n", j);
2909#endif
2910            uprv_free(j);
2911        }
2912        return -1;
2913    }
2914    strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
2915    for(i=0;i<n;i++) {
2916#if defined(ULOC_DEBUG)
2917        /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2918#endif
2919        strs[i]=j[i].locale;
2920    }
2921    res =  uloc_acceptLanguage(result, resultAvailable, outResult,
2922        (const char**)strs, n, availableLocales, status);
2923    for(i=0;i<n;i++) {
2924        uprv_free(strs[i]);
2925    }
2926    uprv_free(strs);
2927    if(j != smallBuffer) {
2928#if defined(ULOC_DEBUG)
2929        fprintf(stderr,"freeing j %p\n", j);
2930#endif
2931        uprv_free(j);
2932    }
2933    return res;
2934}
2935
2936
2937U_CAPI int32_t U_EXPORT2
2938uloc_acceptLanguage(char *result, int32_t resultAvailable,
2939                    UAcceptResult *outResult, const char **acceptList,
2940                    int32_t acceptListCount,
2941                    UEnumeration* availableLocales,
2942                    UErrorCode *status)
2943{
2944    int32_t i,j;
2945    int32_t len;
2946    int32_t maxLen=0;
2947    char tmp[ULOC_FULLNAME_CAPACITY+1];
2948    const char *l;
2949    char **fallbackList;
2950    if(U_FAILURE(*status)) {
2951        return -1;
2952    }
2953    fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
2954    if(fallbackList==NULL) {
2955        *status = U_MEMORY_ALLOCATION_ERROR;
2956        return -1;
2957    }
2958    for(i=0;i<acceptListCount;i++) {
2959#if defined(ULOC_DEBUG)
2960        fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2961#endif
2962        while((l=uenum_next(availableLocales, NULL, status))) {
2963#if defined(ULOC_DEBUG)
2964            fprintf(stderr,"  %s\n", l);
2965#endif
2966            len = (int32_t)uprv_strlen(l);
2967            if(!uprv_strcmp(acceptList[i], l)) {
2968                if(outResult) {
2969                    *outResult = ULOC_ACCEPT_VALID;
2970                }
2971#if defined(ULOC_DEBUG)
2972                fprintf(stderr, "MATCH! %s\n", l);
2973#endif
2974                if(len>0) {
2975                    uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2976                }
2977                for(j=0;j<i;j++) {
2978                    uprv_free(fallbackList[j]);
2979                }
2980                uprv_free(fallbackList);
2981                return u_terminateChars(result, resultAvailable, len, status);
2982            }
2983            if(len>maxLen) {
2984                maxLen = len;
2985            }
2986        }
2987        uenum_reset(availableLocales, status);
2988        /* save off parent info */
2989        if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
2990            fallbackList[i] = uprv_strdup(tmp);
2991        } else {
2992            fallbackList[i]=0;
2993        }
2994    }
2995
2996    for(maxLen--;maxLen>0;maxLen--) {
2997        for(i=0;i<acceptListCount;i++) {
2998            if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2999#if defined(ULOC_DEBUG)
3000                fprintf(stderr,"Try: [%s]", fallbackList[i]);
3001#endif
3002                while((l=uenum_next(availableLocales, NULL, status))) {
3003#if defined(ULOC_DEBUG)
3004                    fprintf(stderr,"  %s\n", l);
3005#endif
3006                    len = (int32_t)uprv_strlen(l);
3007                    if(!uprv_strcmp(fallbackList[i], l)) {
3008                        if(outResult) {
3009                            *outResult = ULOC_ACCEPT_FALLBACK;
3010                        }
3011#if defined(ULOC_DEBUG)
3012                        fprintf(stderr, "fallback MATCH! %s\n", l);
3013#endif
3014                        if(len>0) {
3015                            uprv_strncpy(result, l, uprv_min(len, resultAvailable));
3016                        }
3017                        for(j=0;j<acceptListCount;j++) {
3018                            uprv_free(fallbackList[j]);
3019                        }
3020                        uprv_free(fallbackList);
3021                        return u_terminateChars(result, resultAvailable, len, status);
3022                    }
3023                }
3024                uenum_reset(availableLocales, status);
3025
3026                if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
3027                    uprv_free(fallbackList[i]);
3028                    fallbackList[i] = uprv_strdup(tmp);
3029                } else {
3030                    uprv_free(fallbackList[i]);
3031                    fallbackList[i]=0;
3032                }
3033            }
3034        }
3035        if(outResult) {
3036            *outResult = ULOC_ACCEPT_FAILED;
3037        }
3038    }
3039    for(i=0;i<acceptListCount;i++) {
3040        uprv_free(fallbackList[i]);
3041    }
3042    uprv_free(fallbackList);
3043    return -1;
3044}
3045
3046/*eof*/
3047