1/*
2**********************************************************************
3*   Copyright (C) 1997-2009, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*
7* File ULOC.CPP
8*
9* Modification History:
10*
11*   Date        Name        Description
12*   04/01/97    aliu        Creation.
13*   08/21/98    stephen     JDK 1.2 sync
14*   12/08/98    rtg         New Locale implementation and C API
15*   03/15/99    damiba      overhaul.
16*   04/06/99    stephen     changed setDefault() to realloc and copy
17*   06/14/99    stephen     Changed calls to ures_open for new params
18*   07/21/99    stephen     Modified setDefault() to propagate to C++
19*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
20*                           brought canonicalization code into line with spec
21*****************************************************************************/
22
23/*
24   POSIX's locale format, from putil.c: [no spaces]
25
26     ll [ _CC ] [ . MM ] [ @ VV]
27
28     l = lang, C = ctry, M = charmap, V = variant
29*/
30
31#include "unicode/utypes.h"
32#include "unicode/ustring.h"
33#include "unicode/uloc.h"
34#include "unicode/ures.h"
35
36#include "putilimp.h"
37#include "ustr_imp.h"
38#include "ulocimp.h"
39#include "uresimp.h"
40#include "umutex.h"
41#include "cstring.h"
42#include "cmemory.h"
43#include "ucln_cmn.h"
44#include "locmap.h"
45#include "uarrsort.h"
46#include "uenumimp.h"
47#include "uassert.h"
48
49#include <stdio.h> /* for sprintf */
50
51/* ### Declarations **************************************************/
52
53/* Locale stuff from locid.cpp */
54U_CFUNC void locale_set_default(const char *id);
55U_CFUNC const char *locale_get_default(void);
56U_CFUNC int32_t
57locale_getKeywords(const char *localeID,
58            char prev,
59            char *keywords, int32_t keywordCapacity,
60            char *values, int32_t valuesCapacity, int32_t *valLen,
61            UBool valuesToo,
62            UErrorCode *status);
63
64/* ### Constants **************************************************/
65
66/* These strings describe the resources we attempt to load from
67 the locale ResourceBundle data file.*/
68static const char _kLanguages[]       = "Languages";
69static const char _kScripts[]         = "Scripts";
70static const char _kCountries[]       = "Countries";
71static const char _kVariants[]        = "Variants";
72static const char _kKeys[]            = "Keys";
73static const char _kTypes[]           = "Types";
74static const char _kIndexLocaleName[] = "res_index";
75static const char _kRootName[]        = "root";
76static const char _kIndexTag[]        = "InstalledLocales";
77static const char _kCurrency[]        = "currency";
78static const char _kCurrencies[]      = "Currencies";
79static char** _installedLocales = NULL;
80static int32_t _installedLocalesCount = 0;
81
82/* ### Data tables **************************************************/
83
84/**
85 * Table of language codes, both 2- and 3-letter, with preference
86 * given to 2-letter codes where possible.  Includes 3-letter codes
87 * that lack a 2-letter equivalent.
88 *
89 * This list must be in sorted order.  This list is returned directly
90 * to the user by some API.
91 *
92 * This list must be kept in sync with LANGUAGES_3, with corresponding
93 * entries matched.
94 *
95 * This table should be terminated with a NULL entry, followed by a
96 * second list, and another NULL entry.  The first list is visible to
97 * user code when this array is returned by API.  The second list
98 * contains codes we support, but do not expose through user API.
99 *
100 * Notes
101 *
102 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
103 * include the revisions up to 2001/7/27 *CWB*
104 *
105 * The 3 character codes are the terminology codes like RFC 3066.  This
106 * is compatible with prior ICU codes
107 *
108 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
109 * table but now at the end of the table because 3 character codes are
110 * duplicates.  This avoids bad searches going from 3 to 2 character
111 * codes.
112 *
113 * The range qaa-qtz is reserved for local use
114 */
115static const char * const LANGUAGES[] = {
116    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",
117    "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",
118    "ang", "anp", "apa",
119    "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",
120    "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",
121    "bai", "bal", "ban", "bas", "bat", "be",  "bej",
122    "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",
123    "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",
124    "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",
125    "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",
126    "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",
127    "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",
128    "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",
129    "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",
130    "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",
131    "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",
132    "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",
133    "fr",  "frm", "fro", "frr", "frs", "fur", "fy",
134    "ga",  "gaa", "gay", "gba", "gd",  "gem", "gez", "gil",
135    "gl",  "gmh", "gn",  "goh", "gon", "gor", "got", "grb",
136    "grc", "gsw", "gu",  "gv", "gwi",
137    "ha",  "hai", "haw", "he",  "hi",  "hil", "him",
138    "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",
139    "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",
140    "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",
141    "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",
142    "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
143    "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",
144    "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",
145    "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",
146    "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",
147    "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",
148    "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",
149    "mdf", "mdr", "men", "mfe", "mg",  "mga", "mh",  "mi",  "mic", "min",
150    "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",
151    "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",
152    "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",
153    "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",
154    "niu", "nl",  "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub",
155    "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",
156    "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",
157    "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
158    "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",
159    "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",
160    "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",
161    "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",
162    "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",
163    "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",
164    "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",
165    "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",
166    "sv",  "sw",  "syc", "syr", "ta",  "tai", "te",  "tem", "ter",
167    "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",
168    "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr", "trv",
169    "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",
170    "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",
171    "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",
172    "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",
173    "yi",  "yo",  "ypk", "za",  "zap", "zbl", "zen", "zh",  "znd",
174    "zu",  "zun", "zxx", "zza",
175NULL,
176    "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
177NULL
178};
179static const char* const DEPRECATED_LANGUAGES[]={
180    "in", "iw", "ji", "jw", NULL, NULL
181};
182static const char* const REPLACEMENT_LANGUAGES[]={
183    "id", "he", "yi", "jv", NULL, NULL
184};
185
186/**
187 * Table of 3-letter language codes.
188 *
189 * This is a lookup table used to convert 3-letter language codes to
190 * their 2-letter equivalent, where possible.  It must be kept in sync
191 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
192 * same language as LANGUAGES_3[i].  The commented-out lines are
193 * copied from LANGUAGES to make eyeballing this baby easier.
194 *
195 * Where a 3-letter language code has no 2-letter equivalent, the
196 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
197 *
198 * This table should be terminated with a NULL entry, followed by a
199 * second list, and another NULL entry.  The two lists correspond to
200 * the two lists in LANGUAGES.
201 */
202static const char * const LANGUAGES_3[] = {
203/*  "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "af",  "afa",    */
204    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa",
205/*  "afh", "ain", "ak",  "akk", "ale", "alg", "alt", "am",  "an",  "ang", "anp", "apa",    */
206    "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa",
207/*  "ar",  "arc", "arn", "arp", "art", "arw", "as",  "ast",    */
208    "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast",
209/*  "ath", "aus", "av",  "awa", "ay",  "az",  "ba",  "bad",    */
210    "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad",
211/*  "bai", "bal", "ban", "bas", "bat", "be",  "bej",    */
212    "bai", "bal", "ban", "bas", "bat", "bel", "bej",
213/*  "bem", "ber", "bg",  "bh",  "bho", "bi",  "bik", "bin",    */
214    "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin",
215/*  "bla", "bm",  "bn",  "bnt", "bo",  "br",  "bra", "bs",     */
216    "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos",
217/*  "btk", "bua", "bug", "byn", "ca",  "cad", "cai", "car", "cau",    */
218    "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau",
219/*  "cch", "ce",  "ceb", "cel", "ch",  "chb", "chg", "chk", "chm",    */
220    "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm",
221/*  "chn", "cho", "chp", "chr", "chy", "cmc", "co",  "cop",    */
222    "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop",
223/*  "cpe", "cpf", "cpp", "cr",  "crh", "crp", "cs",  "csb", "cu",  "cus",    */
224    "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus",
225/*  "cv",  "cy",  "da",  "dak", "dar", "day", "de",  "del", "den",    */
226    "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den",
227/*  "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv",  "dyu",    */
228    "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu",
229/*  "dz",  "ee",  "efi", "egy", "eka", "el",  "elx", "en",     */
230    "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng",
231/*  "enm", "eo",  "es",  "et",  "eu",  "ewo", "fa",     */
232    "enm", "epo", "spa", "est", "eus", "ewo", "fas",
233/*  "fan", "fat", "ff",  "fi",  "fil", "fiu", "fj",  "fo",  "fon",    */
234    "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon",
235/*  "fr",  "frm", "fro", "frr", "frs", "fur", "fy",  "ga",  "gaa", "gay",    */
236    "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay",
237/*  "gba", "gd",  "gem", "gez", "gil", "gl",  "gmh", "gn",     */
238    "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn",
239/*  "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "gv",     */
240    "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv",
241/*  "gwi", "ha",  "hai", "haw", "he",  "hi",  "hil", "him",    */
242    "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him",
243/*  "hit", "hmn", "ho",  "hr",  "hsb", "ht",  "hu",  "hup", "hy",  "hz",     */
244    "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her",
245/*  "ia",  "iba", "id",  "ie",  "ig",  "ii",  "ijo", "ik",     */
246    "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk",
247/*  "ilo", "inc", "ine", "inh", "io",  "ira", "iro", "is",  "it",      */
248    "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita",
249/*  "iu",  "ja",  "jbo", "jpr", "jrb", "jv",  "ka",  "kaa", "kab",   */
250    "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab",
251/*  "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",*/
252    "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg",  "kha", "khi",
253/*  "kho", "ki",  "kj",  "kk",  "kl",  "km",  "kmb", "kn",     */
254    "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan",
255/*  "ko",  "kok", "kos", "kpe", "kr",  "krc", "krl", "kro", "kru", "ks",     */
256    "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas",
257/*  "ku",  "kum", "kut", "kv",  "kw",  "ky",  "la",  "lad",    */
258    "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad",
259/*  "lah", "lam", "lb",  "lez", "lg",  "li",  "ln",  "lo",  "lol",    */
260    "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol",
261/*  "loz", "lt",  "lu",  "lua", "lui", "lun", "luo", "lus",    */
262    "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus",
263/*  "lv",  "mad", "mag", "mai", "mak", "man", "map", "mas",    */
264    "lav", "mad", "mag", "mai", "mak", "man", "map", "mas",
265/*  "mdf", "mdr", "men", "mfe", "mg",  "mga", "mh",  "mi",  "mic", "min",    */
266    "mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min",
267/*  "mis", "mk",  "mkh", "ml",  "mn",  "mnc", "mni", "mno",    */
268    "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno",
269/*  "mo",  "moh", "mos", "mr",  "ms",  "mt",  "mul", "mun",    */
270    "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun",
271/*  "mus", "mwl", "mwr", "my",  "myn", "myv", "na",  "nah", "nai", "nap",    */
272    "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap",
273/*  "nb",  "nd",  "nds", "ne",  "new", "ng",  "nia", "nic",    */
274    "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic",
275/*  "niu", "nl",  "nn",  "no",  "nog", "non", "nqo", "nr",  "nso", "nub",    */
276    "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub",
277/*  "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi", "oc",  "oj",     */
278    "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji",
279/*  "om",  "or",  "os",  "osa", "ota", "oto", "pa",  "paa",    */
280    "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa",
281/*  "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",    */
282    "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn",
283/*  "pi",  "pl",  "pon", "pra", "pro", "ps",  "pt",  "qu",     */
284    "pli", "pol", "pon", "pra", "pro", "pus", "por", "que",
285/*  "raj", "rap", "rar", "rm",  "rn",  "ro",  "roa", "rom",    */
286    "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom",
287/*  "ru",  "rup", "rw",  "sa",  "sad", "sah", "sai", "sal", "sam",    */
288    "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam",
289/*  "sas", "sat", "sc",  "scn", "sco", "sd",  "se",  "sel", "sem",    */
290    "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem",
291/*  "sg",  "sga", "sgn", "shn", "si",  "sid", "sio", "sit",    */
292    "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit",
293/*  "sk",  "sl",  "sla", "sm",  "sma", "smi", "smj", "smn",    */
294    "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn",
295/*  "sms", "sn",  "snk", "so",  "sog", "son", "sq",  "sr",     */
296    "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp",
297/*  "srn", "srr", "ss",  "ssa", "st",  "su",  "suk", "sus", "sux",    */
298    "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux",
299/*  "sv",  "sw",  "syc", "syr", "ta",  "tai", "te",  "tem", "ter",    */
300    "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter",
301/*  "tet", "tg",  "th",  "ti",  "tig", "tiv", "tk",  "tkl",    */
302    "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl",
303/*  "tl",  "tlh", "tli", "tmh", "tn",  "to",  "tog", "tpi", "tr", "trv",    */
304    "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv",
305/*  "ts",  "tsi", "tt",  "tum", "tup", "tut", "tvl", "tw",     */
306    "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi",
307/*  "ty",  "tyv", "udm", "ug",  "uga", "uk",  "umb", "und", "ur",     */
308    "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd",
309/*  "uz",  "vai", "ve",  "vi",  "vo",  "vot", "wa",  "wak",    */
310    "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak",
311/*  "wal", "war", "was", "wen", "wo",  "xal", "xh",  "yao", "yap",    */
312    "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap",
313/*  "yi",  "yo",  "ypk", "za",  "zap", "zbl", "zen", "zh",  "znd",    */
314    "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd",
315/*  "zu",  "zun", "zxx", "zza",                                         */
316    "zul", "zun", "zxx", "zza",
317NULL,
318/*  "in",  "iw",  "ji",  "jw",  "sh",                          */
319    "ind", "heb", "yid", "jaw", "srp",
320NULL
321};
322
323/**
324 * Table of 2-letter country codes.
325 *
326 * This list must be in sorted order.  This list is returned directly
327 * to the user by some API.
328 *
329 * This list must be kept in sync with COUNTRIES_3, with corresponding
330 * entries matched.
331 *
332 * This table should be terminated with a NULL entry, followed by a
333 * second list, and another NULL entry.  The first list is visible to
334 * user code when this array is returned by API.  The second list
335 * contains codes we support, but do not expose through user API.
336 *
337 * Notes:
338 *
339 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
340 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
341 * new codes keeping the old ones for compatibility updated to include
342 * 1999/12/03 revisions *CWB*
343 *
344 * RO(ROM) is now RO(ROU) according to
345 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
346 */
347static const char * const COUNTRIES[] = {
348    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",
349    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
350    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
351    "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",
352    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
353    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
354    "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
355    "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
356    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
357    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
358    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
359    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
360    "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
361    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
362    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
363    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
364    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
365    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
366    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
367    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
368    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
369    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
370    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
371    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
372    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",
373    "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
374    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
375    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
376    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
377    "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
378NULL,
379    "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   /* obsolete country codes */
380NULL
381};
382
383static const char* const DEPRECATED_COUNTRIES[] ={
384    "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */
385};
386static const char* const REPLACEMENT_COUNTRIES[] = {
387/*  "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */
388    "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL  /* replacement country codes */
389};
390
391/**
392 * Table of 3-letter country codes.
393 *
394 * This is a lookup table used to convert 3-letter country codes to
395 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
396 * For all valid i, COUNTRIES[i] must refer to the same country as
397 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
398 * to make eyeballing this baby easier.
399 *
400 * This table should be terminated with a NULL entry, followed by a
401 * second list, and another NULL entry.  The two lists correspond to
402 * the two lists in COUNTRIES.
403 */
404static const char * const COUNTRIES_3[] = {
405/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  "AN",     */
406    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT",
407/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
408    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
409/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
410    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
411/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BR",  "BS",  "BT",  "BV",     */
412    "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT",
413/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
414    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
415/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
416    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
417/*  "CU",  "CV",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
418    "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
419/*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
420    "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
421/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
422    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
423/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
424    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
425/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
426    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
427/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
428    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
429/*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
430    "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
431/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
432    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
433/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
434    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
435/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
436    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
437/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
438    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
439/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
440    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
441/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
442    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
443/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
444    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
445/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
446    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
447/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
448    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
449/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
450    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
451/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
452    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
453/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "ST",  "SV",     */
454    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV",
455/*  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
456    "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
457/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
458    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
459/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
460    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
461/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
462    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
463/*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
464    "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
465NULL,
466/*  "FX",  "CS",  "RO",  "TP",  "YU",  "ZR",   */
467    "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR",
468NULL
469};
470
471typedef struct CanonicalizationMap {
472    const char *id;          /* input ID */
473    const char *canonicalID; /* canonicalized output ID */
474    const char *keyword;     /* keyword, or NULL if none */
475    const char *value;       /* keyword value, or NULL if kw==NULL */
476} CanonicalizationMap;
477
478/**
479 * A map to canonicalize locale IDs.  This handles a variety of
480 * different semantic kinds of transformations.
481 */
482static const CanonicalizationMap CANONICALIZE_MAP[] = {
483    { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
484    { "C",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
485    { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
486    { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
487    { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
488    { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
489    { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
490    { "cel_GAULISH",    "cel__GAULISH", NULL, NULL }, /* registered name */
491    { "de_1901",        "de__1901", NULL, NULL }, /* registered name */
492    { "de_1906",        "de__1906", NULL, NULL }, /* registered name */
493    { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
494    { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
495    { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
496    { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
497    { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
498    { "en_BOONT",       "en__BOONT", NULL, NULL }, /* registered name */
499    { "en_SCOUSE",      "en__SCOUSE", NULL, NULL }, /* registered name */
500    { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
501    { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
502    { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
503    { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
504    { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
505    { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
506    { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
507    { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
508    { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
509    { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
510    { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
511    { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
512    { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
513    { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
514    { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
515    { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
516    { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
517    { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
518    { "sl_ROZAJ",       "sl__ROZAJ", NULL, NULL }, /* registered name */
519    { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
520    { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
521    { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
522    { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
523    { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
524    { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
525    { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
526    { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
527    { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
528    { "zh_GAN",         "zh__GAN", NULL, NULL }, /* registered name */
529    { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
530    { "zh_HAKKA",       "zh__HAKKA", NULL, NULL }, /* registered name */
531    { "zh_MIN",         "zh__MIN", NULL, NULL }, /* registered name */
532    { "zh_MIN_NAN",     "zh__MINNAN", NULL, NULL }, /* registered name */
533    { "zh_WUU",         "zh__WUU", NULL, NULL }, /* registered name */
534    { "zh_XIANG",       "zh__XIANG", NULL, NULL }, /* registered name */
535    { "zh_YUE",         "zh__YUE", NULL, NULL }, /* registered name */
536};
537
538typedef struct VariantMap {
539    const char *variant;          /* input ID */
540    const char *keyword;     /* keyword, or NULL if none */
541    const char *value;       /* keyword value, or NULL if kw==NULL */
542} VariantMap;
543
544static const VariantMap VARIANT_MAP[] = {
545    { "EURO",   "currency", "EUR" },
546    { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
547    { "STROKE", "collation", "stroke" }  /* Solaris variant */
548};
549
550/* ### Keywords **************************************************/
551
552#define ULOC_KEYWORD_BUFFER_LEN 25
553#define ULOC_MAX_NO_KEYWORDS 25
554
555static const char *
556locale_getKeywordsStart(const char *localeID) {
557    const char *result = NULL;
558    if((result = uprv_strchr(localeID, '@')) != NULL) {
559        return result;
560    }
561#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
562    else {
563        /* We do this because the @ sign is variant, and the @ sign used on one
564        EBCDIC machine won't be compiled the same way on other EBCDIC based
565        machines. */
566        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
567        const uint8_t *charToFind = ebcdicSigns;
568        while(*charToFind) {
569            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
570                return result;
571            }
572            charToFind++;
573        }
574    }
575#endif
576    return NULL;
577}
578
579/**
580 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
581 * @param keywordName incoming name to be canonicalized
582 * @param status return status (keyword too long)
583 * @return length of the keyword name
584 */
585static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
586{
587  int32_t i;
588  int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName);
589
590  if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) {
591    /* keyword name too long for internal buffer */
592    *status = U_INTERNAL_PROGRAM_ERROR;
593          return 0;
594  }
595
596  /* normalize the keyword name */
597  for(i = 0; i < keywordNameLen; i++) {
598    buf[i] = uprv_tolower(keywordName[i]);
599  }
600  buf[i] = 0;
601
602  return keywordNameLen;
603}
604
605typedef struct {
606    char keyword[ULOC_KEYWORD_BUFFER_LEN];
607    int32_t keywordLen;
608    const char *valueStart;
609    int32_t valueLen;
610} KeywordStruct;
611
612static int32_t U_CALLCONV
613compareKeywordStructs(const void *context, const void *left, const void *right) {
614    const char* leftString = ((const KeywordStruct *)left)->keyword;
615    const char* rightString = ((const KeywordStruct *)right)->keyword;
616    return uprv_strcmp(leftString, rightString);
617}
618
619/**
620 * Both addKeyword and addValue must already be in canonical form.
621 * Either both addKeyword and addValue are NULL, or neither is NULL.
622 * If they are not NULL they must be zero terminated.
623 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
624 */
625static int32_t
626_getKeywords(const char *localeID,
627             char prev,
628             char *keywords, int32_t keywordCapacity,
629             char *values, int32_t valuesCapacity, int32_t *valLen,
630             UBool valuesToo,
631             const char* addKeyword,
632             const char* addValue,
633             UErrorCode *status)
634{
635    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
636
637    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
638    int32_t numKeywords = 0;
639    const char* pos = localeID;
640    const char* equalSign = NULL;
641    const char* semicolon = NULL;
642    int32_t i = 0, j, n;
643    int32_t keywordsLen = 0;
644    int32_t valuesLen = 0;
645
646    if(prev == '@') { /* start of keyword definition */
647        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
648        do {
649            UBool duplicate = FALSE;
650            /* skip leading spaces */
651            while(*pos == ' ') {
652                pos++;
653            }
654            if (!*pos) { /* handle trailing "; " */
655                break;
656            }
657            if(numKeywords == maxKeywords) {
658                *status = U_INTERNAL_PROGRAM_ERROR;
659                return 0;
660            }
661            equalSign = uprv_strchr(pos, '=');
662            semicolon = uprv_strchr(pos, ';');
663            /* lack of '=' [foo@currency] is illegal */
664            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
665            if(!equalSign || (semicolon && semicolon<equalSign)) {
666                *status = U_INVALID_FORMAT_ERROR;
667                return 0;
668            }
669            /* need to normalize both keyword and keyword name */
670            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
671                /* keyword name too long for internal buffer */
672                *status = U_INTERNAL_PROGRAM_ERROR;
673                return 0;
674            }
675            for(i = 0, n = 0; i < equalSign - pos; ++i) {
676                if (pos[i] != ' ') {
677                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
678                }
679            }
680            keywordList[numKeywords].keyword[n] = 0;
681            keywordList[numKeywords].keywordLen = n;
682            /* now grab the value part. First we skip the '=' */
683            equalSign++;
684            /* then we leading spaces */
685            while(*equalSign == ' ') {
686                equalSign++;
687            }
688            keywordList[numKeywords].valueStart = equalSign;
689
690            pos = semicolon;
691            i = 0;
692            if(pos) {
693                while(*(pos - i - 1) == ' ') {
694                    i++;
695                }
696                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
697                pos++;
698            } else {
699                i = (int32_t)uprv_strlen(equalSign);
700                while(equalSign[i-1] == ' ') {
701                    i--;
702                }
703                keywordList[numKeywords].valueLen = i;
704            }
705            /* If this is a duplicate keyword, then ignore it */
706            for (j=0; j<numKeywords; ++j) {
707                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
708                    duplicate = TRUE;
709                    break;
710                }
711            }
712            if (!duplicate) {
713                ++numKeywords;
714            }
715        } while(pos);
716
717        /* Handle addKeyword/addValue. */
718        if (addKeyword != NULL) {
719            UBool duplicate = FALSE;
720            U_ASSERT(addValue != NULL);
721            /* Search for duplicate; if found, do nothing. Explicit keyword
722               overrides addKeyword. */
723            for (j=0; j<numKeywords; ++j) {
724                if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
725                    duplicate = TRUE;
726                    break;
727                }
728            }
729            if (!duplicate) {
730                if (numKeywords == maxKeywords) {
731                    *status = U_INTERNAL_PROGRAM_ERROR;
732                    return 0;
733                }
734                uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
735                keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
736                keywordList[numKeywords].valueStart = addValue;
737                keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
738                ++numKeywords;
739            }
740        } else {
741            U_ASSERT(addValue == NULL);
742        }
743
744        /* now we have a list of keywords */
745        /* we need to sort it */
746        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
747
748        /* Now construct the keyword part */
749        for(i = 0; i < numKeywords; i++) {
750            if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
751                uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
752                if(valuesToo) {
753                    keywords[keywordsLen + keywordList[i].keywordLen] = '=';
754                } else {
755                    keywords[keywordsLen + keywordList[i].keywordLen] = 0;
756                }
757            }
758            keywordsLen += keywordList[i].keywordLen + 1;
759            if(valuesToo) {
760                if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
761                    uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
762                }
763                keywordsLen += keywordList[i].valueLen;
764
765                if(i < numKeywords - 1) {
766                    if(keywordsLen < keywordCapacity) {
767                        keywords[keywordsLen] = ';';
768                    }
769                    keywordsLen++;
770                }
771            }
772            if(values) {
773                if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
774                    uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
775                    values[valuesLen + keywordList[i].valueLen] = 0;
776                }
777                valuesLen += keywordList[i].valueLen + 1;
778            }
779        }
780        if(values) {
781            values[valuesLen] = 0;
782            if(valLen) {
783                *valLen = valuesLen;
784            }
785        }
786        return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
787    } else {
788        return 0;
789    }
790}
791
792U_CFUNC int32_t
793locale_getKeywords(const char *localeID,
794                   char prev,
795                   char *keywords, int32_t keywordCapacity,
796                   char *values, int32_t valuesCapacity, int32_t *valLen,
797                   UBool valuesToo,
798                   UErrorCode *status) {
799    return _getKeywords(localeID, prev, keywords, keywordCapacity,
800                        values, valuesCapacity, valLen, valuesToo,
801                        NULL, NULL, status);
802}
803
804U_CAPI int32_t U_EXPORT2
805uloc_getKeywordValue(const char* localeID,
806                     const char* keywordName,
807                     char* buffer, int32_t bufferCapacity,
808                     UErrorCode* status)
809{
810    const char* nextSeparator = NULL;
811    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
812    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
813    int32_t i = 0;
814    int32_t result = 0;
815
816    if(status && U_SUCCESS(*status) && localeID) {
817
818      const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
819      if(startSearchHere == NULL) {
820          /* no keywords, return at once */
821          return 0;
822      }
823
824      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
825      if(U_FAILURE(*status)) {
826        return 0;
827      }
828
829      /* find the first keyword */
830      while(startSearchHere) {
831          startSearchHere++;
832          /* skip leading spaces (allowed?) */
833          while(*startSearchHere == ' ') {
834              startSearchHere++;
835          }
836          nextSeparator = uprv_strchr(startSearchHere, '=');
837          /* need to normalize both keyword and keyword name */
838          if(!nextSeparator) {
839              break;
840          }
841          if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
842              /* keyword name too long for internal buffer */
843              *status = U_INTERNAL_PROGRAM_ERROR;
844              return 0;
845          }
846          for(i = 0; i < nextSeparator - startSearchHere; i++) {
847              localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]);
848          }
849          /* trim trailing spaces */
850          while(startSearchHere[i-1] == ' ') {
851              i--;
852          }
853          localeKeywordNameBuffer[i] = 0;
854
855          startSearchHere = uprv_strchr(nextSeparator, ';');
856
857          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
858              nextSeparator++;
859              while(*nextSeparator == ' ') {
860                  nextSeparator++;
861              }
862              /* we actually found the keyword. Copy the value */
863              if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) {
864                  while(*(startSearchHere-1) == ' ') {
865                      startSearchHere--;
866                  }
867                  uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator);
868                  result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status);
869              } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */
870                  i = (int32_t)uprv_strlen(nextSeparator);
871                  while(nextSeparator[i - 1] == ' ') {
872                      i--;
873                  }
874                  uprv_strncpy(buffer, nextSeparator, i);
875                  result = u_terminateChars(buffer, bufferCapacity, i, status);
876              } else {
877                  /* give a bigger buffer, please */
878                  *status = U_BUFFER_OVERFLOW_ERROR;
879                  if(startSearchHere) {
880                      result = (int32_t)(startSearchHere - nextSeparator);
881                  } else {
882                      result = (int32_t)uprv_strlen(nextSeparator);
883                  }
884              }
885              return result;
886          }
887      }
888    }
889    return 0;
890}
891
892U_CAPI int32_t U_EXPORT2
893uloc_setKeywordValue(const char* keywordName,
894                     const char* keywordValue,
895                     char* buffer, int32_t bufferCapacity,
896                     UErrorCode* status)
897{
898    /* TODO: sorting. removal. */
899    int32_t keywordNameLen;
900    int32_t keywordValueLen;
901    int32_t bufLen;
902    int32_t needLen = 0;
903    int32_t foundValueLen;
904    int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */
905    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
906    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
907    int32_t i = 0;
908    int32_t rc;
909    char* nextSeparator = NULL;
910    char* nextEqualsign = NULL;
911    char* startSearchHere = NULL;
912    char* keywordStart = NULL;
913    char *insertHere = NULL;
914    if(U_FAILURE(*status)) {
915        return -1;
916    }
917    if(bufferCapacity>1) {
918        bufLen = (int32_t)uprv_strlen(buffer);
919    } else {
920        *status = U_ILLEGAL_ARGUMENT_ERROR;
921        return 0;
922    }
923    if(bufferCapacity<bufLen) {
924        /* The capacity is less than the length?! Is this NULL terminated? */
925        *status = U_ILLEGAL_ARGUMENT_ERROR;
926        return 0;
927    }
928    if(keywordValue && !*keywordValue) {
929        keywordValue = NULL;
930    }
931    if(keywordValue) {
932        keywordValueLen = (int32_t)uprv_strlen(keywordValue);
933    } else {
934        keywordValueLen = 0;
935    }
936    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
937    if(U_FAILURE(*status)) {
938        return 0;
939    }
940    startSearchHere = (char*)locale_getKeywordsStart(buffer);
941    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
942        if(!keywordValue) { /* no keywords = nothing to remove */
943            return bufLen;
944        }
945
946        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
947        if(startSearchHere) { /* had a single @ */
948            needLen--; /* already had the @ */
949            /* startSearchHere points at the @ */
950        } else {
951            startSearchHere=buffer+bufLen;
952        }
953        if(needLen >= bufferCapacity) {
954            *status = U_BUFFER_OVERFLOW_ERROR;
955            return needLen; /* no change */
956        }
957        *startSearchHere = '@';
958        startSearchHere++;
959        uprv_strcpy(startSearchHere, keywordNameBuffer);
960        startSearchHere += keywordNameLen;
961        *startSearchHere = '=';
962        startSearchHere++;
963        uprv_strcpy(startSearchHere, keywordValue);
964        startSearchHere+=keywordValueLen;
965        return needLen;
966    } /* end shortcut - no @ */
967
968    keywordStart = startSearchHere;
969    /* search for keyword */
970    while(keywordStart) {
971        keywordStart++;
972        /* skip leading spaces (allowed?) */
973        while(*keywordStart == ' ') {
974            keywordStart++;
975        }
976        nextEqualsign = uprv_strchr(keywordStart, '=');
977        /* need to normalize both keyword and keyword name */
978        if(!nextEqualsign) {
979            break;
980        }
981        if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) {
982            /* keyword name too long for internal buffer */
983            *status = U_INTERNAL_PROGRAM_ERROR;
984            return 0;
985        }
986        for(i = 0; i < nextEqualsign - keywordStart; i++) {
987            localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]);
988        }
989        /* trim trailing spaces */
990        while(keywordStart[i-1] == ' ') {
991            i--;
992        }
993        localeKeywordNameBuffer[i] = 0;
994
995        nextSeparator = uprv_strchr(nextEqualsign, ';');
996        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
997        if(rc == 0) {
998            nextEqualsign++;
999            while(*nextEqualsign == ' ') {
1000                nextEqualsign++;
1001            }
1002            /* we actually found the keyword. Change the value */
1003            if (nextSeparator) {
1004                keywordAtEnd = 0;
1005                foundValueLen = (int32_t)(nextSeparator - nextEqualsign);
1006            } else {
1007                keywordAtEnd = 1;
1008                foundValueLen = (int32_t)uprv_strlen(nextEqualsign);
1009            }
1010            if(keywordValue) { /* adding a value - not removing */
1011              if(foundValueLen == keywordValueLen) {
1012                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1013                return bufLen; /* no change in size */
1014              } else if(foundValueLen > keywordValueLen) {
1015                int32_t delta = foundValueLen - keywordValueLen;
1016                if(nextSeparator) { /* RH side */
1017                  uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer));
1018                }
1019                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1020                bufLen -= delta;
1021                buffer[bufLen]=0;
1022                return bufLen;
1023              } else { /* FVL < KVL */
1024                int32_t delta = keywordValueLen - foundValueLen;
1025                if((bufLen+delta) >= bufferCapacity) {
1026                  *status = U_BUFFER_OVERFLOW_ERROR;
1027                  return bufLen+delta;
1028                }
1029                if(nextSeparator) { /* RH side */
1030                  uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer));
1031                }
1032                uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen);
1033                bufLen += delta;
1034                buffer[bufLen]=0;
1035                return bufLen;
1036              }
1037            } else { /* removing a keyword */
1038              if(keywordAtEnd) {
1039                /* zero out the ';' or '@' just before startSearchhere */
1040                keywordStart[-1] = 0;
1041                return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */
1042              } else {
1043                uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer));
1044                keywordStart[bufLen-((nextSeparator+1)-buffer)]=0;
1045                return (int32_t)(bufLen-((nextSeparator+1)-keywordStart));
1046              }
1047            }
1048        } else if(rc<0){ /* end match keyword */
1049          /* could insert at this location. */
1050          insertHere = keywordStart;
1051        }
1052        keywordStart = nextSeparator;
1053    } /* end loop searching */
1054
1055    if(!keywordValue) {
1056      return bufLen; /* removal of non-extant keyword - no change */
1057    }
1058
1059    /* we know there is at least one keyword. */
1060    needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1061    if(needLen >= bufferCapacity) {
1062        *status = U_BUFFER_OVERFLOW_ERROR;
1063        return needLen; /* no change */
1064    }
1065
1066    if(insertHere) {
1067      uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer));
1068      keywordStart = insertHere;
1069    } else {
1070      keywordStart = buffer+bufLen;
1071      *keywordStart = ';';
1072      keywordStart++;
1073    }
1074    uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen);
1075    keywordStart += keywordNameLen;
1076    *keywordStart = '=';
1077    keywordStart++;
1078    uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */
1079    keywordStart+=keywordValueLen;
1080    if(insertHere) {
1081      *keywordStart = ';';
1082      keywordStart++;
1083    }
1084    buffer[needLen]=0;
1085    return needLen;
1086}
1087
1088/* ### ID parsing implementation **************************************************/
1089
1090/*returns TRUE if a is an ID separator FALSE otherwise*/
1091#define _isIDSeparator(a) (a == '_' || a == '-')
1092
1093#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1094
1095/*returns TRUE if one of the special prefixes is here (s=string)
1096  'x-' or 'i-' */
1097#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1098
1099/* Dot terminates it because of POSIX form  where dot precedes the codepage
1100 * except for variant
1101 */
1102#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1103
1104static char* _strnchr(const char* str, int32_t len, char c) {
1105    U_ASSERT(str != 0 && len >= 0);
1106    while (len-- != 0) {
1107        char d = *str;
1108        if (d == c) {
1109            return (char*) str;
1110        } else if (d == 0) {
1111            break;
1112        }
1113        ++str;
1114    }
1115    return NULL;
1116}
1117
1118/**
1119 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1120 * a NULL entry, followed by more entries, and a second NULL entry.
1121 *
1122 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1123 * COUNTRIES_3.
1124 */
1125static int16_t _findIndex(const char* const* list, const char* key)
1126{
1127    const char* const* anchor = list;
1128    int32_t pass = 0;
1129
1130    /* Make two passes through two NULL-terminated arrays at 'list' */
1131    while (pass++ < 2) {
1132        while (*list) {
1133            if (uprv_strcmp(key, *list) == 0) {
1134                return (int16_t)(list - anchor);
1135            }
1136            list++;
1137        }
1138        ++list;     /* skip final NULL *CWB*/
1139    }
1140    return -1;
1141}
1142
1143/* count the length of src while copying it to dest; return strlen(src) */
1144static U_INLINE int32_t
1145_copyCount(char *dest, int32_t destCapacity, const char *src) {
1146    const char *anchor;
1147    char c;
1148
1149    anchor=src;
1150    for(;;) {
1151        if((c=*src)==0) {
1152            return (int32_t)(src-anchor);
1153        }
1154        if(destCapacity<=0) {
1155            return (int32_t)((src-anchor)+uprv_strlen(src));
1156        }
1157        ++src;
1158        *dest++=c;
1159        --destCapacity;
1160    }
1161}
1162
1163static const char*
1164uloc_getCurrentCountryID(const char* oldID){
1165    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1166    if (offset >= 0) {
1167        return REPLACEMENT_COUNTRIES[offset];
1168    }
1169    return oldID;
1170}
1171static const char*
1172uloc_getCurrentLanguageID(const char* oldID){
1173    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1174    if (offset >= 0) {
1175        return REPLACEMENT_LANGUAGES[offset];
1176    }
1177    return oldID;
1178}
1179/*
1180 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1181 * avoid duplicating code to handle the earlier locale ID pieces
1182 * in the functions for the later ones by
1183 * setting the *pEnd pointer to where they stopped parsing
1184 *
1185 * TODO try to use this in Locale
1186 */
1187static int32_t
1188_getLanguage(const char *localeID,
1189             char *language, int32_t languageCapacity,
1190             const char **pEnd) {
1191    int32_t i=0;
1192    int32_t offset;
1193    char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1194
1195    /* if it starts with i- or x- then copy that prefix */
1196    if(_isIDPrefix(localeID)) {
1197        if(i<languageCapacity) {
1198            language[i]=(char)uprv_tolower(*localeID);
1199        }
1200        if(i<languageCapacity) {
1201            language[i+1]='-';
1202        }
1203        i+=2;
1204        localeID+=2;
1205    }
1206
1207    /* copy the language as far as possible and count its length */
1208    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1209        if(i<languageCapacity) {
1210            language[i]=(char)uprv_tolower(*localeID);
1211        }
1212        if(i<3) {
1213            lang[i]=(char)uprv_tolower(*localeID);
1214        }
1215        i++;
1216        localeID++;
1217    }
1218
1219    if(i==3) {
1220        /* convert 3 character code to 2 character code if possible *CWB*/
1221        offset=_findIndex(LANGUAGES_3, lang);
1222        if(offset>=0) {
1223            i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1224        }
1225    }
1226
1227    if(pEnd!=NULL) {
1228        *pEnd=localeID;
1229    }
1230    return i;
1231}
1232
1233static int32_t
1234_getScript(const char *localeID,
1235            char *script, int32_t scriptCapacity,
1236            const char **pEnd)
1237{
1238    int32_t idLen = 0;
1239
1240    if (pEnd != NULL) {
1241        *pEnd = localeID;
1242    }
1243
1244    /* copy the second item as far as possible and count its length */
1245    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1246        idLen++;
1247    }
1248
1249    /* If it's exactly 4 characters long, then it's a script and not a country. */
1250    if (idLen == 4) {
1251        int32_t i;
1252        if (pEnd != NULL) {
1253            *pEnd = localeID+idLen;
1254        }
1255        if(idLen > scriptCapacity) {
1256            idLen = scriptCapacity;
1257        }
1258        if (idLen >= 1) {
1259            script[0]=(char)uprv_toupper(*(localeID++));
1260        }
1261        for (i = 1; i < idLen; i++) {
1262            script[i]=(char)uprv_tolower(*(localeID++));
1263        }
1264    }
1265    else {
1266        idLen = 0;
1267    }
1268    return idLen;
1269}
1270
1271static int32_t
1272_getCountry(const char *localeID,
1273            char *country, int32_t countryCapacity,
1274            const char **pEnd)
1275{
1276    int32_t i=0;
1277    char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1278    int32_t offset;
1279
1280    /* copy the country as far as possible and count its length */
1281    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1282        if(i<countryCapacity) {
1283            country[i]=(char)uprv_toupper(*localeID);
1284        }
1285        if(i<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1286            cnty[i]=(char)uprv_toupper(*localeID);
1287        }
1288        i++;
1289        localeID++;
1290    }
1291
1292    /* convert 3 character code to 2 character code if possible *CWB*/
1293    if(i==3) {
1294        offset=_findIndex(COUNTRIES_3, cnty);
1295        if(offset>=0) {
1296            i=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1297        }
1298    }
1299
1300    if(pEnd!=NULL) {
1301        *pEnd=localeID;
1302    }
1303    return i;
1304}
1305
1306/**
1307 * @param needSeparator if true, then add leading '_' if any variants
1308 * are added to 'variant'
1309 */
1310static int32_t
1311_getVariantEx(const char *localeID,
1312              char prev,
1313              char *variant, int32_t variantCapacity,
1314              UBool needSeparator) {
1315    int32_t i=0;
1316
1317    /* get one or more variant tags and separate them with '_' */
1318    if(_isIDSeparator(prev)) {
1319        /* get a variant string after a '-' or '_' */
1320        while(!_isTerminator(*localeID)) {
1321            if (needSeparator) {
1322                if (i<variantCapacity) {
1323                    variant[i] = '_';
1324                }
1325                ++i;
1326                needSeparator = FALSE;
1327            }
1328            if(i<variantCapacity) {
1329                variant[i]=(char)uprv_toupper(*localeID);
1330                if(variant[i]=='-') {
1331                    variant[i]='_';
1332                }
1333            }
1334            i++;
1335            localeID++;
1336        }
1337    }
1338
1339    /* if there is no variant tag after a '-' or '_' then look for '@' */
1340    if(i==0) {
1341        if(prev=='@') {
1342            /* keep localeID */
1343        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1344            ++localeID; /* point after the '@' */
1345        } else {
1346            return 0;
1347        }
1348        while(!_isTerminator(*localeID)) {
1349            if (needSeparator) {
1350                if (i<variantCapacity) {
1351                    variant[i] = '_';
1352                }
1353                ++i;
1354                needSeparator = FALSE;
1355            }
1356            if(i<variantCapacity) {
1357                variant[i]=(char)uprv_toupper(*localeID);
1358                if(variant[i]=='-' || variant[i]==',') {
1359                    variant[i]='_';
1360                }
1361            }
1362            i++;
1363            localeID++;
1364        }
1365    }
1366
1367    return i;
1368}
1369
1370static int32_t
1371_getVariant(const char *localeID,
1372            char prev,
1373            char *variant, int32_t variantCapacity) {
1374    return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1375}
1376
1377/**
1378 * Delete ALL instances of a variant from the given list of one or
1379 * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1380 * @param variants the source string of one or more variants,
1381 * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1382 * terminated; if it is, trailing zero will NOT be maintained.
1383 * @param variantsLen length of variants
1384 * @param toDelete variant to delete, without separators, e.g.  "EURO"
1385 * or "PREEURO"; not zero terminated
1386 * @param toDeleteLen length of toDelete
1387 * @return number of characters deleted from variants
1388 */
1389static int32_t
1390_deleteVariant(char* variants, int32_t variantsLen,
1391               const char* toDelete, int32_t toDeleteLen)
1392{
1393    int32_t delta = 0; /* number of chars deleted */
1394    for (;;) {
1395        UBool flag = FALSE;
1396        if (variantsLen < toDeleteLen) {
1397            return delta;
1398        }
1399        if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1400            (variantsLen == toDeleteLen ||
1401             (flag=(variants[toDeleteLen] == '_'))))
1402        {
1403            int32_t d = toDeleteLen + (flag?1:0);
1404            variantsLen -= d;
1405            delta += d;
1406            if (variantsLen > 0) {
1407                uprv_memmove(variants, variants+d, variantsLen);
1408            }
1409        } else {
1410            char* p = _strnchr(variants, variantsLen, '_');
1411            if (p == NULL) {
1412                return delta;
1413            }
1414            ++p;
1415            variantsLen -= (int32_t)(p - variants);
1416            variants = p;
1417        }
1418    }
1419}
1420
1421/* Keyword enumeration */
1422
1423typedef struct UKeywordsContext {
1424    char* keywords;
1425    char* current;
1426} UKeywordsContext;
1427
1428static void U_CALLCONV
1429uloc_kw_closeKeywords(UEnumeration *enumerator) {
1430    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1431    uprv_free(enumerator->context);
1432    uprv_free(enumerator);
1433}
1434
1435static int32_t U_CALLCONV
1436uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) {
1437    char *kw = ((UKeywordsContext *)en->context)->keywords;
1438    int32_t result = 0;
1439    while(*kw) {
1440        result++;
1441        kw += uprv_strlen(kw)+1;
1442    }
1443    return result;
1444}
1445
1446static const char* U_CALLCONV
1447uloc_kw_nextKeyword(UEnumeration* en,
1448                    int32_t* resultLength,
1449                    UErrorCode* status) {
1450    const char* result = ((UKeywordsContext *)en->context)->current;
1451    int32_t len = 0;
1452    if(*result) {
1453        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1454        ((UKeywordsContext *)en->context)->current += len+1;
1455    } else {
1456        result = NULL;
1457    }
1458    if (resultLength) {
1459        *resultLength = len;
1460    }
1461    return result;
1462}
1463
1464static void U_CALLCONV
1465uloc_kw_resetKeywords(UEnumeration* en,
1466                      UErrorCode* status) {
1467    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1468}
1469
1470static const UEnumeration gKeywordsEnum = {
1471    NULL,
1472    NULL,
1473    uloc_kw_closeKeywords,
1474    uloc_kw_countKeywords,
1475    uenum_unextDefault,
1476    uloc_kw_nextKeyword,
1477    uloc_kw_resetKeywords
1478};
1479
1480U_CAPI UEnumeration* U_EXPORT2
1481uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1482{
1483    UKeywordsContext *myContext = NULL;
1484    UEnumeration *result = NULL;
1485
1486    if(U_FAILURE(*status)) {
1487        return NULL;
1488    }
1489    result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1490    /* Null pointer test */
1491    if (result == NULL) {
1492        *status = U_MEMORY_ALLOCATION_ERROR;
1493        return NULL;
1494    }
1495    uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1496    myContext = uprv_malloc(sizeof(UKeywordsContext));
1497    if (myContext == NULL) {
1498        *status = U_MEMORY_ALLOCATION_ERROR;
1499        uprv_free(result);
1500        return NULL;
1501    }
1502    myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1503    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1504    myContext->keywords[keywordListSize] = 0;
1505    myContext->current = myContext->keywords;
1506    result->context = myContext;
1507    return result;
1508}
1509
1510U_CAPI UEnumeration* U_EXPORT2
1511uloc_openKeywords(const char* localeID,
1512                        UErrorCode* status)
1513{
1514    int32_t i=0;
1515    char keywords[256];
1516    int32_t keywordsCapacity = 256;
1517    if(status==NULL || U_FAILURE(*status)) {
1518        return 0;
1519    }
1520
1521    if(localeID==NULL) {
1522        localeID=uloc_getDefault();
1523    }
1524
1525    /* Skip the language */
1526    _getLanguage(localeID, NULL, 0, &localeID);
1527    if(_isIDSeparator(*localeID)) {
1528        const char *scriptID;
1529        /* Skip the script if available */
1530        _getScript(localeID+1, NULL, 0, &scriptID);
1531        if(scriptID != localeID+1) {
1532            /* Found optional script */
1533            localeID = scriptID;
1534        }
1535        /* Skip the Country */
1536        if (_isIDSeparator(*localeID)) {
1537            _getCountry(localeID+1, NULL, 0, &localeID);
1538            if(_isIDSeparator(*localeID)) {
1539                _getVariant(localeID+1, *localeID, NULL, 0);
1540            }
1541        }
1542    }
1543
1544    /* keywords are located after '@' */
1545    if((localeID = locale_getKeywordsStart(localeID)) != NULL) {
1546        i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1547    }
1548
1549    if(i) {
1550        return uloc_openKeywordList(keywords, i, status);
1551    } else {
1552        return NULL;
1553    }
1554}
1555
1556
1557/* bit-flags for 'options' parameter of _canonicalize */
1558#define _ULOC_STRIP_KEYWORDS 0x2
1559#define _ULOC_CANONICALIZE   0x1
1560
1561#define OPTION_SET(options, mask) ((options & mask) != 0)
1562
1563static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1564#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0])
1565
1566/**
1567 * Canonicalize the given localeID, to level 1 or to level 2,
1568 * depending on the options.  To specify level 1, pass in options=0.
1569 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1570 *
1571 * This is the code underlying uloc_getName and uloc_canonicalize.
1572 */
1573static int32_t
1574_canonicalize(const char* localeID,
1575              char* result,
1576              int32_t resultCapacity,
1577              uint32_t options,
1578              UErrorCode* err) {
1579    int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1580    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1581    const char* origLocaleID;
1582    const char* keywordAssign = NULL;
1583    const char* separatorIndicator = NULL;
1584    const char* addKeyword = NULL;
1585    const char* addValue = NULL;
1586    char* name;
1587    char* variant = NULL; /* pointer into name, or NULL */
1588
1589    if (U_FAILURE(*err)) {
1590        return 0;
1591    }
1592
1593    if (localeID==NULL) {
1594        localeID=uloc_getDefault();
1595    }
1596    origLocaleID=localeID;
1597
1598    /* if we are doing a full canonicalization, then put results in
1599       localeBuffer, if necessary; otherwise send them to result. */
1600    if (OPTION_SET(options, _ULOC_CANONICALIZE) &&
1601        (result == NULL || resultCapacity <  sizeof(localeBuffer))) {
1602        name = localeBuffer;
1603        nameCapacity = sizeof(localeBuffer);
1604    } else {
1605        name = result;
1606        nameCapacity = resultCapacity;
1607    }
1608
1609    /* get all pieces, one after another, and separate with '_' */
1610    len=_getLanguage(localeID, name, nameCapacity, &localeID);
1611
1612    if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1613        const char *d = uloc_getDefault();
1614
1615        len = uprv_strlen(d);
1616
1617        if (name != NULL) {
1618            uprv_strncpy(name, d, len);
1619        }
1620    } else if(_isIDSeparator(*localeID)) {
1621        const char *scriptID;
1622
1623        ++fieldCount;
1624        if(len<nameCapacity) {
1625            name[len]='_';
1626        }
1627        ++len;
1628
1629        scriptSize=_getScript(localeID+1, name+len, nameCapacity-len, &scriptID);
1630        if(scriptSize > 0) {
1631            /* Found optional script */
1632            localeID = scriptID;
1633            ++fieldCount;
1634            len+=scriptSize;
1635            if (_isIDSeparator(*localeID)) {
1636                /* If there is something else, then we add the _ */
1637                if(len<nameCapacity) {
1638                    name[len]='_';
1639                }
1640                ++len;
1641            }
1642        }
1643
1644        if (_isIDSeparator(*localeID)) {
1645            len+=_getCountry(localeID+1, name+len, nameCapacity-len, &localeID);
1646            if(_isIDSeparator(*localeID)) {
1647                ++fieldCount;
1648                if(len<nameCapacity) {
1649                    name[len]='_';
1650                }
1651                ++len;
1652                variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len);
1653                if (variantSize > 0) {
1654                    variant = name+len;
1655                    len += variantSize;
1656                    localeID += variantSize + 1; /* skip '_' and variant */
1657                }
1658            }
1659        }
1660    }
1661
1662    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1663    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {
1664        UBool done = FALSE;
1665        do {
1666            char c = *localeID;
1667            switch (c) {
1668            case 0:
1669            case '@':
1670                done = TRUE;
1671                break;
1672            default:
1673                if (len<nameCapacity) {
1674                    name[len] = c;
1675                }
1676                ++len;
1677                ++localeID;
1678                break;
1679            }
1680        } while (!done);
1681    }
1682
1683    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1684       After this, localeID either points to '@' or is NULL */
1685    if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1686        keywordAssign = uprv_strchr(localeID, '=');
1687        separatorIndicator = uprv_strchr(localeID, ';');
1688    }
1689
1690    /* Copy POSIX-style variant, if any [mr@FOO] */
1691    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1692        localeID != NULL && keywordAssign == NULL) {
1693        for (;;) {
1694            char c = *localeID;
1695            if (c == 0) {
1696                break;
1697            }
1698            if (len<nameCapacity) {
1699                name[len] = c;
1700            }
1701            ++len;
1702            ++localeID;
1703        }
1704    }
1705
1706    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1707        /* Handle @FOO variant if @ is present and not followed by = */
1708        if (localeID!=NULL && keywordAssign==NULL) {
1709            int32_t posixVariantSize;
1710            /* Add missing '_' if needed */
1711            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1712                do {
1713                    if(len<nameCapacity) {
1714                        name[len]='_';
1715                    }
1716                    ++len;
1717                    ++fieldCount;
1718                } while(fieldCount<2);
1719            }
1720            posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len,
1721                                             (UBool)(variantSize > 0));
1722            if (posixVariantSize > 0) {
1723                if (variant == NULL) {
1724                    variant = name+len;
1725                }
1726                len += posixVariantSize;
1727                variantSize += posixVariantSize;
1728            }
1729        }
1730
1731        /* Handle generic variants first */
1732        if (variant) {
1733            for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) {
1734                const char* variantToCompare = VARIANT_MAP[j].variant;
1735                int32_t n = (int32_t)uprv_strlen(variantToCompare);
1736                int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1737                len -= variantLen;
1738                if (variantLen > 0) {
1739                    if (name[len-1] == '_') { /* delete trailing '_' */
1740                        --len;
1741                    }
1742                    addKeyword = VARIANT_MAP[j].keyword;
1743                    addValue = VARIANT_MAP[j].value;
1744                    break;
1745                }
1746            }
1747            if (name[len-1] == '_') { /* delete trailing '_' */
1748                --len;
1749            }
1750        }
1751
1752        /* Look up the ID in the canonicalization map */
1753        for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
1754            const char* id = CANONICALIZE_MAP[j].id;
1755            int32_t n = (int32_t)uprv_strlen(id);
1756            if (len == n && uprv_strncmp(name, id, n) == 0) {
1757                if (n == 0 && localeID != NULL) {
1758                    break; /* Don't remap "" if keywords present */
1759                }
1760                len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1761                if (CANONICALIZE_MAP[j].keyword) {
1762                    addKeyword = CANONICALIZE_MAP[j].keyword;
1763                    addValue = CANONICALIZE_MAP[j].value;
1764                }
1765                break;
1766            }
1767        }
1768    }
1769
1770    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1771        if (localeID!=NULL && keywordAssign!=NULL &&
1772            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1773            if(len<nameCapacity) {
1774                name[len]='@';
1775            }
1776            ++len;
1777            ++fieldCount;
1778            len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
1779                                addKeyword, addValue, err);
1780        } else if (addKeyword != NULL) {
1781            U_ASSERT(addValue != NULL);
1782            /* inelegant but works -- later make _getKeywords do this? */
1783            len += _copyCount(name+len, nameCapacity-len, "@");
1784            len += _copyCount(name+len, nameCapacity-len, addKeyword);
1785            len += _copyCount(name+len, nameCapacity-len, "=");
1786            len += _copyCount(name+len, nameCapacity-len, addValue);
1787        }
1788    }
1789
1790    if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1791        uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1792    }
1793
1794    return u_terminateChars(result, resultCapacity, len, err);
1795}
1796
1797/* ### ID parsing API **************************************************/
1798
1799U_CAPI int32_t  U_EXPORT2
1800uloc_getParent(const char*    localeID,
1801               char* parent,
1802               int32_t parentCapacity,
1803               UErrorCode* err)
1804{
1805    const char *lastUnderscore;
1806    int32_t i;
1807
1808    if (U_FAILURE(*err))
1809        return 0;
1810
1811    if (localeID == NULL)
1812        localeID = uloc_getDefault();
1813
1814    lastUnderscore=uprv_strrchr(localeID, '_');
1815    if(lastUnderscore!=NULL) {
1816        i=(int32_t)(lastUnderscore-localeID);
1817    } else {
1818        i=0;
1819    }
1820
1821    if(i>0 && parent != localeID) {
1822        uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1823    }
1824    return u_terminateChars(parent, parentCapacity, i, err);
1825}
1826
1827U_CAPI int32_t U_EXPORT2
1828uloc_getLanguage(const char*    localeID,
1829         char* language,
1830         int32_t languageCapacity,
1831         UErrorCode* err)
1832{
1833    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1834    int32_t i=0;
1835
1836    if (err==NULL || U_FAILURE(*err)) {
1837        return 0;
1838    }
1839
1840    if(localeID==NULL) {
1841        localeID=uloc_getDefault();
1842    }
1843
1844    i=_getLanguage(localeID, language, languageCapacity, NULL);
1845    return u_terminateChars(language, languageCapacity, i, err);
1846}
1847
1848U_CAPI int32_t U_EXPORT2
1849uloc_getScript(const char*    localeID,
1850         char* script,
1851         int32_t scriptCapacity,
1852         UErrorCode* err)
1853{
1854    int32_t i=0;
1855
1856    if(err==NULL || U_FAILURE(*err)) {
1857        return 0;
1858    }
1859
1860    if(localeID==NULL) {
1861        localeID=uloc_getDefault();
1862    }
1863
1864    /* skip the language */
1865    _getLanguage(localeID, NULL, 0, &localeID);
1866    if(_isIDSeparator(*localeID)) {
1867        i=_getScript(localeID+1, script, scriptCapacity, NULL);
1868    }
1869    return u_terminateChars(script, scriptCapacity, i, err);
1870}
1871
1872U_CAPI int32_t  U_EXPORT2
1873uloc_getCountry(const char* localeID,
1874            char* country,
1875            int32_t countryCapacity,
1876            UErrorCode* err)
1877{
1878    int32_t i=0;
1879
1880    if(err==NULL || U_FAILURE(*err)) {
1881        return 0;
1882    }
1883
1884    if(localeID==NULL) {
1885        localeID=uloc_getDefault();
1886    }
1887
1888    /* Skip the language */
1889    _getLanguage(localeID, NULL, 0, &localeID);
1890    if(_isIDSeparator(*localeID)) {
1891        const char *scriptID;
1892        /* Skip the script if available */
1893        _getScript(localeID+1, NULL, 0, &scriptID);
1894        if(scriptID != localeID+1) {
1895            /* Found optional script */
1896            localeID = scriptID;
1897        }
1898        if(_isIDSeparator(*localeID)) {
1899            i=_getCountry(localeID+1, country, countryCapacity, NULL);
1900        }
1901    }
1902    return u_terminateChars(country, countryCapacity, i, err);
1903}
1904
1905U_CAPI int32_t  U_EXPORT2
1906uloc_getVariant(const char* localeID,
1907                char* variant,
1908                int32_t variantCapacity,
1909                UErrorCode* err)
1910{
1911    int32_t i=0;
1912
1913    if(err==NULL || U_FAILURE(*err)) {
1914        return 0;
1915    }
1916
1917    if(localeID==NULL) {
1918        localeID=uloc_getDefault();
1919    }
1920
1921    /* Skip the language */
1922    _getLanguage(localeID, NULL, 0, &localeID);
1923    if(_isIDSeparator(*localeID)) {
1924        const char *scriptID;
1925        /* Skip the script if available */
1926        _getScript(localeID+1, NULL, 0, &scriptID);
1927        if(scriptID != localeID+1) {
1928            /* Found optional script */
1929            localeID = scriptID;
1930        }
1931        /* Skip the Country */
1932        if (_isIDSeparator(*localeID)) {
1933            _getCountry(localeID+1, NULL, 0, &localeID);
1934            if(_isIDSeparator(*localeID)) {
1935                i=_getVariant(localeID+1, *localeID, variant, variantCapacity);
1936            }
1937        }
1938    }
1939
1940    /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
1941    /* if we do not have a variant tag yet then try a POSIX variant after '@' */
1942/*
1943    if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1944        i=_getVariant(localeID+1, '@', variant, variantCapacity);
1945    }
1946*/
1947    return u_terminateChars(variant, variantCapacity, i, err);
1948}
1949
1950U_CAPI int32_t  U_EXPORT2
1951uloc_getName(const char* localeID,
1952             char* name,
1953             int32_t nameCapacity,
1954             UErrorCode* err)
1955{
1956    return _canonicalize(localeID, name, nameCapacity, 0, err);
1957}
1958
1959U_CAPI int32_t  U_EXPORT2
1960uloc_getBaseName(const char* localeID,
1961                 char* name,
1962                 int32_t nameCapacity,
1963                 UErrorCode* err)
1964{
1965    return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1966}
1967
1968U_CAPI int32_t  U_EXPORT2
1969uloc_canonicalize(const char* localeID,
1970                  char* name,
1971                  int32_t nameCapacity,
1972                  UErrorCode* err)
1973{
1974    return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1975}
1976
1977U_CAPI const char*  U_EXPORT2
1978uloc_getISO3Language(const char* localeID)
1979{
1980    int16_t offset;
1981    char lang[ULOC_LANG_CAPACITY];
1982    UErrorCode err = U_ZERO_ERROR;
1983
1984    if (localeID == NULL)
1985    {
1986        localeID = uloc_getDefault();
1987    }
1988    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1989    if (U_FAILURE(err))
1990        return "";
1991    offset = _findIndex(LANGUAGES, lang);
1992    if (offset < 0)
1993        return "";
1994    return LANGUAGES_3[offset];
1995}
1996
1997U_CAPI const char*  U_EXPORT2
1998uloc_getISO3Country(const char* localeID)
1999{
2000    int16_t offset;
2001    char cntry[ULOC_LANG_CAPACITY];
2002    UErrorCode err = U_ZERO_ERROR;
2003
2004    if (localeID == NULL)
2005    {
2006        localeID = uloc_getDefault();
2007    }
2008    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2009    if (U_FAILURE(err))
2010        return "";
2011    offset = _findIndex(COUNTRIES, cntry);
2012    if (offset < 0)
2013        return "";
2014
2015    return COUNTRIES_3[offset];
2016}
2017
2018U_CAPI uint32_t  U_EXPORT2
2019uloc_getLCID(const char* localeID)
2020{
2021    UErrorCode status = U_ZERO_ERROR;
2022    char       langID[ULOC_FULLNAME_CAPACITY];
2023
2024    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2025    if (U_FAILURE(status)) {
2026        return 0;
2027    }
2028
2029    return uprv_convertToLCID(langID, localeID, &status);
2030}
2031
2032U_CAPI int32_t U_EXPORT2
2033uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2034                UErrorCode *status)
2035{
2036    int32_t length;
2037    const char *posix = uprv_convertToPosix(hostid, status);
2038    if (U_FAILURE(*status) || posix == NULL) {
2039        return 0;
2040    }
2041    length = (int32_t)uprv_strlen(posix);
2042    if (length+1 > localeCapacity) {
2043        *status = U_BUFFER_OVERFLOW_ERROR;
2044    }
2045    else {
2046        uprv_strcpy(locale, posix);
2047    }
2048    return length;
2049}
2050
2051/* ### Default locale **************************************************/
2052
2053U_CAPI const char*  U_EXPORT2
2054uloc_getDefault()
2055{
2056    return locale_get_default();
2057}
2058
2059U_CAPI void  U_EXPORT2
2060uloc_setDefault(const char*   newDefaultLocale,
2061             UErrorCode* err)
2062{
2063    if (U_FAILURE(*err))
2064        return;
2065    /* the error code isn't currently used for anything by this function*/
2066
2067    /* propagate change to C++ */
2068    locale_set_default(newDefaultLocale);
2069}
2070
2071/* ### Display name **************************************************/
2072
2073/*
2074 * Lookup a resource bundle table item with fallback on the table level.
2075 * Regular resource bundle lookups perform fallback to parent locale bundles
2076 * and eventually the root bundle, but only for top-level items.
2077 * This function takes the name of a top-level table and of an item in that table
2078 * and performs a lookup of both, falling back until a bundle contains a table
2079 * with this item.
2080 *
2081 * Note: Only the opening of entire bundles falls back through the default locale
2082 * before root. Once a bundle is open, item lookups do not go through the
2083 * default locale because that would result in a mix of languages that is
2084 * unpredictable to the programmer and most likely useless.
2085 */
2086static const UChar *
2087_res_getTableStringWithFallback(const char *path, const char *locale,
2088                              const char *tableKey, const char *subTableKey,
2089                              const char *itemKey,
2090                              int32_t *pLength,
2091                              UErrorCode *pErrorCode)
2092{
2093/*    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
2094    UResourceBundle *rb=NULL, table, subTable;
2095    const UChar *item=NULL;
2096    UErrorCode errorCode;
2097    char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
2098
2099    /*
2100     * open the bundle for the current locale
2101     * this falls back through the locale's chain to root
2102     */
2103    errorCode=U_ZERO_ERROR;
2104    rb=ures_open(path, locale, &errorCode);
2105    if(U_FAILURE(errorCode)) {
2106        /* total failure, not even root could be opened */
2107        *pErrorCode=errorCode;
2108        return NULL;
2109    } else if(errorCode==U_USING_DEFAULT_WARNING ||
2110                (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING)
2111    ) {
2112        /* set the "strongest" error code (success->fallback->default->failure) */
2113        *pErrorCode=errorCode;
2114    }
2115
2116    for(;;){
2117        ures_initStackObject(&table);
2118        ures_initStackObject(&subTable);
2119        ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode);
2120        if (subTableKey != NULL) {
2121            /*
2122            ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode);
2123            item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode);
2124            if(U_FAILURE(errorCode)){
2125                *pErrorCode = errorCode;
2126            }
2127
2128            break;*/
2129
2130            ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode);
2131        }
2132        if(U_SUCCESS(errorCode)){
2133            item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode);
2134            if(U_FAILURE(errorCode)){
2135                const char* replacement = NULL;
2136                *pErrorCode = errorCode; /*save the errorCode*/
2137                errorCode = U_ZERO_ERROR;
2138                /* may be a deprecated code */
2139                if(uprv_strcmp(tableKey, "Countries")==0){
2140                    replacement =  uloc_getCurrentCountryID(itemKey);
2141                }else if(uprv_strcmp(tableKey, "Languages")==0){
2142                    replacement =  uloc_getCurrentLanguageID(itemKey);
2143                }
2144                /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/
2145                if(replacement!=NULL && itemKey != replacement){
2146                    item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode);
2147                    if(U_SUCCESS(errorCode)){
2148                        *pErrorCode = errorCode;
2149                        break;
2150                    }
2151                }
2152            }else{
2153                break;
2154            }
2155        }
2156
2157        if(U_FAILURE(errorCode)){
2158
2159            /* still can't figure out ?.. try the fallback mechanism */
2160            int32_t len = 0;
2161            const UChar* fallbackLocale =  NULL;
2162            *pErrorCode = errorCode;
2163            errorCode = U_ZERO_ERROR;
2164
2165            fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode);
2166            if(U_FAILURE(errorCode)){
2167               *pErrorCode = errorCode;
2168                break;
2169            }
2170
2171            u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
2172
2173            /* guard against recursive fallback */
2174            if(uprv_strcmp(explicitFallbackName, locale)==0){
2175                *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
2176                break;
2177            }
2178            ures_close(rb);
2179            rb = ures_open(NULL, explicitFallbackName, &errorCode);
2180            if(U_FAILURE(errorCode)){
2181                *pErrorCode = errorCode;
2182                break;
2183            }
2184            /* succeeded in opening the fallback bundle .. continue and try to fetch the item */
2185        }else{
2186            break;
2187        }
2188    }
2189    /* done with the locale string - ready to close table and rb */
2190    ures_close(&subTable);
2191    ures_close(&table);
2192    ures_close(rb);
2193    return item;
2194}
2195
2196static int32_t
2197_getStringOrCopyKey(const char *path, const char *locale,
2198                    const char *tableKey,
2199                    const char* subTableKey,
2200                    const char *itemKey,
2201                    const char *substitute,
2202                    UChar *dest, int32_t destCapacity,
2203                    UErrorCode *pErrorCode) {
2204    const UChar *s = NULL;
2205    int32_t length = 0;
2206
2207    if(itemKey==NULL) {
2208        /* top-level item: normal resource bundle access */
2209        UResourceBundle *rb;
2210
2211        rb=ures_open(path, locale, pErrorCode);
2212        if(U_SUCCESS(*pErrorCode)) {
2213            s=ures_getStringByKey(rb, tableKey, &length, pErrorCode);
2214            /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
2215            ures_close(rb);
2216        }
2217    } else {
2218        /* Language code should not be a number. If it is, set the error code. */
2219        if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
2220            *pErrorCode = U_MISSING_RESOURCE_ERROR;
2221        } else {
2222            /* second-level item, use special fallback */
2223            s=_res_getTableStringWithFallback(path, locale,
2224                                               tableKey,
2225                                               subTableKey,
2226                                               itemKey,
2227                                               &length,
2228                                               pErrorCode);
2229        }
2230    }
2231    if(U_SUCCESS(*pErrorCode)) {
2232        int32_t copyLength=uprv_min(length, destCapacity);
2233        if(copyLength>0 && s != NULL) {
2234            u_memcpy(dest, s, copyLength);
2235        }
2236    } else {
2237        /* no string from a resource bundle: convert the substitute */
2238        length=(int32_t)uprv_strlen(substitute);
2239        u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
2240        *pErrorCode=U_USING_DEFAULT_WARNING;
2241    }
2242
2243    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2244}
2245
2246static int32_t
2247_getDisplayNameForComponent(const char *locale,
2248                            const char *displayLocale,
2249                            UChar *dest, int32_t destCapacity,
2250                            int32_t (*getter)(const char *, char *, int32_t, UErrorCode *),
2251                            const char *tag,
2252                            UErrorCode *pErrorCode) {
2253    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
2254    int32_t length;
2255    UErrorCode localStatus;
2256
2257    /* argument checking */
2258    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2259        return 0;
2260    }
2261
2262    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2263        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2264        return 0;
2265    }
2266
2267    localStatus = U_ZERO_ERROR;
2268    length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
2269    if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
2270        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2271        return 0;
2272    }
2273    if(length==0) {
2274        return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
2275    }
2276
2277    return _getStringOrCopyKey(NULL, displayLocale,
2278                               tag, NULL, localeBuffer,
2279                               localeBuffer,
2280                               dest, destCapacity,
2281                               pErrorCode);
2282}
2283
2284U_CAPI int32_t U_EXPORT2
2285uloc_getDisplayLanguage(const char *locale,
2286                        const char *displayLocale,
2287                        UChar *dest, int32_t destCapacity,
2288                        UErrorCode *pErrorCode) {
2289    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2290                uloc_getLanguage, _kLanguages, pErrorCode);
2291}
2292
2293U_CAPI int32_t U_EXPORT2
2294uloc_getDisplayScript(const char* locale,
2295                      const char* displayLocale,
2296                      UChar *dest, int32_t destCapacity,
2297                      UErrorCode *pErrorCode)
2298{
2299    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2300                uloc_getScript, _kScripts, pErrorCode);
2301}
2302
2303U_CAPI int32_t U_EXPORT2
2304uloc_getDisplayCountry(const char *locale,
2305                       const char *displayLocale,
2306                       UChar *dest, int32_t destCapacity,
2307                       UErrorCode *pErrorCode) {
2308    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2309                uloc_getCountry, _kCountries, pErrorCode);
2310}
2311
2312/*
2313 * TODO separate variant1_variant2_variant3...
2314 * by getting each tag's display string and concatenating them with ", "
2315 * in between - similar to uloc_getDisplayName()
2316 */
2317U_CAPI int32_t U_EXPORT2
2318uloc_getDisplayVariant(const char *locale,
2319                       const char *displayLocale,
2320                       UChar *dest, int32_t destCapacity,
2321                       UErrorCode *pErrorCode) {
2322    return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
2323                uloc_getVariant, _kVariants, pErrorCode);
2324}
2325
2326U_CAPI int32_t U_EXPORT2
2327uloc_getDisplayName(const char *locale,
2328                    const char *displayLocale,
2329                    UChar *dest, int32_t destCapacity,
2330                    UErrorCode *pErrorCode)
2331{
2332    int32_t length, length2, length3 = 0;
2333    UBool hasLanguage, hasScript, hasCountry, hasVariant, hasKeywords;
2334    UEnumeration* keywordEnum = NULL;
2335    int32_t keywordCount = 0;
2336    const char *keyword = NULL;
2337    int32_t keywordLen = 0;
2338    char keywordValue[256];
2339    int32_t keywordValueLen = 0;
2340
2341    /* argument checking */
2342    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2343        return 0;
2344    }
2345
2346    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2347        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2348        return 0;
2349    }
2350
2351    /*
2352     * if there is a language, then write "language (country, variant)"
2353     * otherwise write "country, variant"
2354     */
2355
2356    /* write the language */
2357    length=uloc_getDisplayLanguage(locale, displayLocale,
2358                                   dest, destCapacity,
2359                                   pErrorCode);
2360    hasLanguage= length>0;
2361
2362    if(hasLanguage) {
2363        /* append " (" */
2364        if(length<destCapacity) {
2365            dest[length]=0x20;
2366        }
2367        ++length;
2368        if(length<destCapacity) {
2369            dest[length]=0x28;
2370        }
2371        ++length;
2372    }
2373
2374    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2375        /* keep preflighting */
2376        *pErrorCode=U_ZERO_ERROR;
2377    }
2378
2379    /* append the script */
2380    if(length<destCapacity) {
2381        length2=uloc_getDisplayScript(locale, displayLocale,
2382                                       dest+length, destCapacity-length,
2383                                       pErrorCode);
2384    } else {
2385        length2=uloc_getDisplayScript(locale, displayLocale,
2386                                       NULL, 0,
2387                                       pErrorCode);
2388    }
2389    hasScript= length2>0;
2390    length+=length2;
2391
2392    if(hasScript) {
2393        /* append ", " */
2394        if(length<destCapacity) {
2395            dest[length]=0x2c;
2396        }
2397        ++length;
2398        if(length<destCapacity) {
2399            dest[length]=0x20;
2400        }
2401        ++length;
2402    }
2403
2404    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2405        /* keep preflighting */
2406        *pErrorCode=U_ZERO_ERROR;
2407    }
2408
2409    /* append the country */
2410    if(length<destCapacity) {
2411        length2=uloc_getDisplayCountry(locale, displayLocale,
2412                                       dest+length, destCapacity-length,
2413                                       pErrorCode);
2414    } else {
2415        length2=uloc_getDisplayCountry(locale, displayLocale,
2416                                       NULL, 0,
2417                                       pErrorCode);
2418    }
2419    hasCountry= length2>0;
2420    length+=length2;
2421
2422    if(hasCountry) {
2423        /* append ", " */
2424        if(length<destCapacity) {
2425            dest[length]=0x2c;
2426        }
2427        ++length;
2428        if(length<destCapacity) {
2429            dest[length]=0x20;
2430        }
2431        ++length;
2432    }
2433
2434    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2435        /* keep preflighting */
2436        *pErrorCode=U_ZERO_ERROR;
2437    }
2438
2439    /* append the variant */
2440    if(length<destCapacity) {
2441        length2=uloc_getDisplayVariant(locale, displayLocale,
2442                                       dest+length, destCapacity-length,
2443                                       pErrorCode);
2444    } else {
2445        length2=uloc_getDisplayVariant(locale, displayLocale,
2446                                       NULL, 0,
2447                                       pErrorCode);
2448    }
2449    hasVariant= length2>0;
2450    length+=length2;
2451
2452    if(hasVariant) {
2453        /* append ", " */
2454        if(length<destCapacity) {
2455            dest[length]=0x2c;
2456        }
2457        ++length;
2458        if(length<destCapacity) {
2459            dest[length]=0x20;
2460        }
2461        ++length;
2462    }
2463
2464    keywordEnum = uloc_openKeywords(locale, pErrorCode);
2465
2466    for(keywordCount = uenum_count(keywordEnum, pErrorCode); keywordCount > 0 ; keywordCount--){
2467          if(U_FAILURE(*pErrorCode)){
2468              break;
2469          }
2470          /* the uenum_next returns NUL terminated string */
2471          keyword = uenum_next(keywordEnum, &keywordLen, pErrorCode);
2472          if(length + length3 < destCapacity) {
2473            length3 += uloc_getDisplayKeyword(keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2474          } else {
2475            length3 += uloc_getDisplayKeyword(keyword, displayLocale, NULL, 0, pErrorCode);
2476          }
2477          if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2478              /* keep preflighting */
2479              *pErrorCode=U_ZERO_ERROR;
2480          }
2481          keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, 256, pErrorCode);
2482          if(keywordValueLen) {
2483            if(length + length3 < destCapacity) {
2484              dest[length + length3] = 0x3D;
2485            }
2486            length3++;
2487            if(length + length3 < destCapacity) {
2488              length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode);
2489            } else {
2490              length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, NULL, 0, pErrorCode);
2491            }
2492            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2493                /* keep preflighting */
2494                *pErrorCode=U_ZERO_ERROR;
2495            }
2496          }
2497          if(keywordCount > 1) {
2498            if(length + length3 + 1 < destCapacity && keywordCount) {
2499              dest[length + length3]=0x2c;
2500              dest[length + length3+1]=0x20;
2501            }
2502            length3++; /* ',' */
2503            length3++; /* ' ' */
2504          }
2505    }
2506    uenum_close(keywordEnum);
2507
2508    hasKeywords = length3 > 0;
2509    length += length3;
2510
2511
2512
2513    if ((hasScript && !hasCountry)
2514        || ((hasScript || hasCountry) && !hasVariant && !hasKeywords)
2515        || ((hasScript || hasCountry || hasVariant) && !hasKeywords)
2516        || (hasLanguage && !hasScript && !hasCountry && !hasVariant && !hasKeywords))
2517    {
2518        /* remove ", " or " (" */
2519        length-=2;
2520    }
2521
2522    if (hasLanguage && (hasScript || hasCountry || hasVariant || hasKeywords)) {
2523        /* append ")" */
2524        if(length<destCapacity) {
2525            dest[length]=0x29;
2526        }
2527        ++length;
2528    }
2529
2530    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2531        /* keep preflighting */
2532        *pErrorCode=U_ZERO_ERROR;
2533    }
2534
2535    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
2536}
2537
2538U_CAPI int32_t U_EXPORT2
2539uloc_getDisplayKeyword(const char* keyword,
2540                       const char* displayLocale,
2541                       UChar* dest,
2542                       int32_t destCapacity,
2543                       UErrorCode* status){
2544
2545    /* argument checking */
2546    if(status==NULL || U_FAILURE(*status)) {
2547        return 0;
2548    }
2549
2550    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2551        *status=U_ILLEGAL_ARGUMENT_ERROR;
2552        return 0;
2553    }
2554
2555
2556    /* pass itemKey=NULL to look for a top-level item */
2557    return _getStringOrCopyKey(NULL, displayLocale,
2558                               _kKeys, NULL,
2559                               keyword,
2560                               keyword,
2561                               dest, destCapacity,
2562                               status);
2563
2564}
2565
2566
2567#define UCURRENCY_DISPLAY_NAME_INDEX 1
2568
2569U_CAPI int32_t U_EXPORT2
2570uloc_getDisplayKeywordValue(   const char* locale,
2571                               const char* keyword,
2572                               const char* displayLocale,
2573                               UChar* dest,
2574                               int32_t destCapacity,
2575                               UErrorCode* status){
2576
2577
2578    char keywordValue[ULOC_FULLNAME_CAPACITY*4];
2579    int32_t capacity = ULOC_FULLNAME_CAPACITY*4;
2580    int32_t keywordValueLen =0;
2581
2582    /* argument checking */
2583    if(status==NULL || U_FAILURE(*status)) {
2584        return 0;
2585    }
2586
2587    if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
2588        *status=U_ILLEGAL_ARGUMENT_ERROR;
2589        return 0;
2590    }
2591
2592    /* get the keyword value */
2593    keywordValue[0]=0;
2594    keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status);
2595
2596    /*
2597     * if the keyword is equal to currency .. then to get the display name
2598     * we need to do the fallback ourselves
2599     */
2600    if(uprv_stricmp(keyword, _kCurrency)==0){
2601
2602        int32_t dispNameLen = 0;
2603        const UChar *dispName = NULL;
2604
2605        UResourceBundle *bundle     = ures_open(NULL, displayLocale, status);
2606        UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status);
2607        UResourceBundle *currency   = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status);
2608
2609        dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status);
2610
2611        /*close the bundles */
2612        ures_close(currency);
2613        ures_close(currencies);
2614        ures_close(bundle);
2615
2616        if(U_FAILURE(*status)){
2617            if(*status == U_MISSING_RESOURCE_ERROR){
2618                /* we just want to write the value over if nothing is available */
2619                *status = U_USING_DEFAULT_WARNING;
2620            }else{
2621                return 0;
2622            }
2623        }
2624
2625        /* now copy the dispName over if not NULL */
2626        if(dispName != NULL){
2627            if(dispNameLen <= destCapacity){
2628                uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR);
2629                return u_terminateUChars(dest, destCapacity, dispNameLen, status);
2630            }else{
2631                *status = U_BUFFER_OVERFLOW_ERROR;
2632                return dispNameLen;
2633            }
2634        }else{
2635            /* we have not found the display name for the value .. just copy over */
2636            if(keywordValueLen <= destCapacity){
2637                u_charsToUChars(keywordValue, dest, keywordValueLen);
2638                return u_terminateUChars(dest, destCapacity, keywordValueLen, status);
2639            }else{
2640                 *status = U_BUFFER_OVERFLOW_ERROR;
2641                return keywordValueLen;
2642            }
2643        }
2644
2645
2646    }else{
2647
2648        return _getStringOrCopyKey(NULL, displayLocale,
2649                                   _kTypes, keyword,
2650                                   keywordValue,
2651                                   keywordValue,
2652                                   dest, destCapacity,
2653                                   status);
2654    }
2655}
2656
2657/* ### Get available **************************************************/
2658
2659static UBool U_CALLCONV uloc_cleanup(void) {
2660    char ** temp;
2661
2662    if (_installedLocales) {
2663        temp = _installedLocales;
2664        _installedLocales = NULL;
2665
2666        _installedLocalesCount = 0;
2667
2668        uprv_free(temp);
2669    }
2670    return TRUE;
2671}
2672
2673static void _load_installedLocales()
2674{
2675    UBool   localesLoaded;
2676
2677    UMTX_CHECK(NULL, _installedLocales != NULL, localesLoaded);
2678
2679    if (localesLoaded == FALSE) {
2680        UResourceBundle *indexLocale = NULL;
2681        UResourceBundle installed;
2682        UErrorCode status = U_ZERO_ERROR;
2683        char ** temp;
2684        int32_t i = 0;
2685        int32_t localeCount;
2686
2687        ures_initStackObject(&installed);
2688        indexLocale = ures_openDirect(NULL, _kIndexLocaleName, &status);
2689        ures_getByKey(indexLocale, _kIndexTag, &installed, &status);
2690
2691        if(U_SUCCESS(status)) {
2692            localeCount = ures_getSize(&installed);
2693            temp = (char **) uprv_malloc(sizeof(char*) * (localeCount+1));
2694            /* Check for null pointer */
2695            if (temp != NULL) {
2696                ures_resetIterator(&installed);
2697                while(ures_hasNext(&installed)) {
2698                    ures_getNextString(&installed, NULL, (const char **)&temp[i++], &status);
2699                }
2700                temp[i] = NULL;
2701
2702                umtx_lock(NULL);
2703                if (_installedLocales == NULL)
2704                {
2705                    _installedLocalesCount = localeCount;
2706                    _installedLocales = temp;
2707                    temp = NULL;
2708                    ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
2709                }
2710                umtx_unlock(NULL);
2711
2712                uprv_free(temp);
2713            }
2714        }
2715        ures_close(&installed);
2716        ures_close(indexLocale);
2717    }
2718}
2719
2720U_CAPI const char* U_EXPORT2
2721uloc_getAvailable(int32_t offset)
2722{
2723
2724    _load_installedLocales();
2725
2726    if (offset > _installedLocalesCount)
2727        return NULL;
2728    return _installedLocales[offset];
2729}
2730
2731U_CAPI int32_t  U_EXPORT2
2732uloc_countAvailable()
2733{
2734    _load_installedLocales();
2735    return _installedLocalesCount;
2736}
2737
2738/**
2739 * Returns a list of all language codes defined in ISO 639.  This is a pointer
2740 * to an array of pointers to arrays of char.  All of these pointers are owned
2741 * by ICU-- do not delete them, and do not write through them.  The array is
2742 * terminated with a null pointer.
2743 */
2744U_CAPI const char* const*  U_EXPORT2
2745uloc_getISOLanguages()
2746{
2747    return LANGUAGES;
2748}
2749
2750/**
2751 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2752 * pointer to an array of pointers to arrays of char.  All of these pointers are
2753 * owned by ICU-- do not delete them, and do not write through them.  The array is
2754 * terminated with a null pointer.
2755 */
2756U_CAPI const char* const*  U_EXPORT2
2757uloc_getISOCountries()
2758{
2759    return COUNTRIES;
2760}
2761
2762
2763/* this function to be moved into cstring.c later */
2764static char gDecimal = 0;
2765
2766static /* U_CAPI */
2767double
2768/* U_EXPORT2 */
2769_uloc_strtod(const char *start, char **end) {
2770    char *decimal;
2771    char *myEnd;
2772    char buf[30];
2773    double rv;
2774    if (!gDecimal) {
2775        char rep[5];
2776        /* For machines that decide to change the decimal on you,
2777        and try to be too smart with localization.
2778        This normally should be just a '.'. */
2779        sprintf(rep, "%+1.1f", 1.0);
2780        gDecimal = rep[2];
2781    }
2782
2783    if(gDecimal == '.') {
2784        return uprv_strtod(start, end); /* fall through to OS */
2785    } else {
2786        uprv_strncpy(buf, start, 29);
2787        buf[29]=0;
2788        decimal = uprv_strchr(buf, '.');
2789        if(decimal) {
2790            *decimal = gDecimal;
2791        } else {
2792            return uprv_strtod(start, end); /* no decimal point */
2793        }
2794        rv = uprv_strtod(buf, &myEnd);
2795        if(end) {
2796            *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2797        }
2798        return rv;
2799    }
2800}
2801
2802typedef struct {
2803    float q;
2804    int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2805    char *locale;
2806} _acceptLangItem;
2807
2808static int32_t U_CALLCONV
2809uloc_acceptLanguageCompare(const void *context, const void *a, const void *b)
2810{
2811    const _acceptLangItem *aa = (const _acceptLangItem*)a;
2812    const _acceptLangItem *bb = (const _acceptLangItem*)b;
2813
2814    int32_t rc = 0;
2815    if(bb->q < aa->q) {
2816        rc = -1;  /* A > B */
2817    } else if(bb->q > aa->q) {
2818        rc = 1;   /* A < B */
2819    } else {
2820        rc = 0;   /* A = B */
2821    }
2822
2823    if(rc==0) {
2824        rc = uprv_stricmp(aa->locale, bb->locale);
2825    }
2826
2827#if defined(ULOC_DEBUG)
2828    /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2829    aa->locale, aa->q,
2830    bb->locale, bb->q,
2831    rc);*/
2832#endif
2833
2834    return rc;
2835}
2836
2837static ULayoutType
2838_uloc_getOrientationHelper(const char* localeId,
2839                           const char* key,
2840                           UErrorCode *status)
2841{
2842    ULayoutType result = ULOC_LAYOUT_UNKNOWN;
2843
2844    if (!U_FAILURE(*status)) {
2845        int32_t length = 0;
2846        char localeBuffer[ULOC_FULLNAME_CAPACITY];
2847
2848        uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
2849
2850        if (!U_FAILURE(*status)) {
2851            const UChar* const value =
2852                _res_getTableStringWithFallback(
2853                    NULL,
2854                    localeBuffer,
2855                    "layout",
2856                    NULL,
2857                    key,
2858                    &length,
2859                    status);
2860
2861            if (!U_FAILURE(*status) && length != 0) {
2862                switch(value[0])
2863                {
2864                case 0x0062: /* 'b' */
2865                    result = ULOC_LAYOUT_BTT;
2866                    break;
2867                case 0x006C: /* 'l' */
2868                    result = ULOC_LAYOUT_LTR;
2869                    break;
2870                case 0x0072: /* 'r' */
2871                    result = ULOC_LAYOUT_RTL;
2872                    break;
2873                case 0x0074: /* 't' */
2874                    result = ULOC_LAYOUT_TTB;
2875                    break;
2876                default:
2877                    *status = U_INTERNAL_PROGRAM_ERROR;
2878                    break;
2879                }
2880            }
2881        }
2882    }
2883
2884    return result;
2885}
2886
2887U_DRAFT ULayoutType U_EXPORT2
2888uloc_getCharacterOrientation(const char* localeId,
2889                             UErrorCode *status)
2890{
2891    return _uloc_getOrientationHelper(localeId, "characters", status);
2892}
2893
2894/**
2895 * Get the layout line orientation for the specified locale.
2896 *
2897 * @param localeID locale name
2898 * @param status Error status
2899 * @return an enum indicating the layout orientation for lines.
2900 * @stable ICU 4.0
2901 */
2902U_DRAFT ULayoutType U_EXPORT2
2903uloc_getLineOrientation(const char* localeId,
2904                        UErrorCode *status)
2905{
2906    return _uloc_getOrientationHelper(localeId, "lines", status);
2907}
2908
2909/*
2910mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2911*/
2912
2913U_CAPI int32_t U_EXPORT2
2914uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2915                            const char *httpAcceptLanguage,
2916                            UEnumeration* availableLocales,
2917                            UErrorCode *status)
2918{
2919    _acceptLangItem *j;
2920    _acceptLangItem smallBuffer[30];
2921    char **strs;
2922    char tmp[ULOC_FULLNAME_CAPACITY +1];
2923    int32_t n = 0;
2924    const char *itemEnd;
2925    const char *paramEnd;
2926    const char *s;
2927    const char *t;
2928    int32_t res;
2929    int32_t i;
2930    int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2931    int32_t jSize;
2932    char *tempstr; /* Use for null pointer check */
2933
2934    j = smallBuffer;
2935    jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]);
2936    if(U_FAILURE(*status)) {
2937        return -1;
2938    }
2939
2940    for(s=httpAcceptLanguage;s&&*s;) {
2941        while(isspace(*s)) /* eat space at the beginning */
2942            s++;
2943        itemEnd=uprv_strchr(s,',');
2944        paramEnd=uprv_strchr(s,';');
2945        if(!itemEnd) {
2946            itemEnd = httpAcceptLanguage+l; /* end of string */
2947        }
2948        if(paramEnd && paramEnd<itemEnd) {
2949            /* semicolon (;) is closer than end (,) */
2950            t = paramEnd+1;
2951            if(*t=='q') {
2952                t++;
2953            }
2954            while(isspace(*t)) {
2955                t++;
2956            }
2957            if(*t=='=') {
2958                t++;
2959            }
2960            while(isspace(*t)) {
2961                t++;
2962            }
2963            j[n].q = (float)_uloc_strtod(t,NULL);
2964        } else {
2965            /* no semicolon - it's 1.0 */
2966            j[n].q = 1.0f;
2967            paramEnd = itemEnd;
2968        }
2969        j[n].dummy=0;
2970        /* eat spaces prior to semi */
2971        for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2972            ;
2973        /* Check for null pointer from uprv_strndup */
2974        tempstr = uprv_strndup(s,(int32_t)((t+1)-s));
2975        if (tempstr == NULL) {
2976            *status = U_MEMORY_ALLOCATION_ERROR;
2977            return -1;
2978        }
2979        j[n].locale = tempstr;
2980        uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status);
2981        if(strcmp(j[n].locale,tmp)) {
2982            uprv_free(j[n].locale);
2983            j[n].locale=uprv_strdup(tmp);
2984        }
2985#if defined(ULOC_DEBUG)
2986        /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2987#endif
2988        n++;
2989        s = itemEnd;
2990        while(*s==',') { /* eat duplicate commas */
2991            s++;
2992        }
2993        if(n>=jSize) {
2994            if(j==smallBuffer) {  /* overflowed the small buffer. */
2995                j = uprv_malloc(sizeof(j[0])*(jSize*2));
2996                if(j!=NULL) {
2997                    uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize);
2998                }
2999#if defined(ULOC_DEBUG)
3000                fprintf(stderr,"malloced at size %d\n", jSize);
3001#endif
3002            } else {
3003                j = uprv_realloc(j, sizeof(j[0])*jSize*2);
3004#if defined(ULOC_DEBUG)
3005                fprintf(stderr,"re-alloced at size %d\n", jSize);
3006#endif
3007            }
3008            jSize *= 2;
3009            if(j==NULL) {
3010                *status = U_MEMORY_ALLOCATION_ERROR;
3011                return -1;
3012            }
3013        }
3014    }
3015    uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
3016    if(U_FAILURE(*status)) {
3017        if(j != smallBuffer) {
3018#if defined(ULOC_DEBUG)
3019            fprintf(stderr,"freeing j %p\n", j);
3020#endif
3021            uprv_free(j);
3022        }
3023        return -1;
3024    }
3025    strs = uprv_malloc((size_t)(sizeof(strs[0])*n));
3026    /* Check for null pointer */
3027    if (strs == NULL) {
3028        uprv_free(j); /* Free to avoid memory leak */
3029        *status = U_MEMORY_ALLOCATION_ERROR;
3030        return -1;
3031    }
3032    for(i=0;i<n;i++) {
3033#if defined(ULOC_DEBUG)
3034        /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
3035#endif
3036        strs[i]=j[i].locale;
3037    }
3038    res =  uloc_acceptLanguage(result, resultAvailable, outResult,
3039        (const char**)strs, n, availableLocales, status);
3040    for(i=0;i<n;i++) {
3041        uprv_free(strs[i]);
3042    }
3043    uprv_free(strs);
3044    if(j != smallBuffer) {
3045#if defined(ULOC_DEBUG)
3046        fprintf(stderr,"freeing j %p\n", j);
3047#endif
3048        uprv_free(j);
3049    }
3050    return res;
3051}
3052
3053
3054U_CAPI int32_t U_EXPORT2
3055uloc_acceptLanguage(char *result, int32_t resultAvailable,
3056                    UAcceptResult *outResult, const char **acceptList,
3057                    int32_t acceptListCount,
3058                    UEnumeration* availableLocales,
3059                    UErrorCode *status)
3060{
3061    int32_t i,j;
3062    int32_t len;
3063    int32_t maxLen=0;
3064    char tmp[ULOC_FULLNAME_CAPACITY+1];
3065    const char *l;
3066    char **fallbackList;
3067    if(U_FAILURE(*status)) {
3068        return -1;
3069    }
3070    fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount));
3071    if(fallbackList==NULL) {
3072        *status = U_MEMORY_ALLOCATION_ERROR;
3073        return -1;
3074    }
3075    for(i=0;i<acceptListCount;i++) {
3076#if defined(ULOC_DEBUG)
3077        fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
3078#endif
3079        while((l=uenum_next(availableLocales, NULL, status))) {
3080#if defined(ULOC_DEBUG)
3081            fprintf(stderr,"  %s\n", l);
3082#endif
3083            len = (int32_t)uprv_strlen(l);
3084            if(!uprv_strcmp(acceptList[i], l)) {
3085                if(outResult) {
3086                    *outResult = ULOC_ACCEPT_VALID;
3087                }
3088#if defined(ULOC_DEBUG)
3089                fprintf(stderr, "MATCH! %s\n", l);
3090#endif
3091                if(len>0) {
3092                    uprv_strncpy(result, l, uprv_min(len, resultAvailable));
3093                }
3094                for(j=0;j<i;j++) {
3095                    uprv_free(fallbackList[j]);
3096                }
3097                uprv_free(fallbackList);
3098                return u_terminateChars(result, resultAvailable, len, status);
3099            }
3100            if(len>maxLen) {
3101                maxLen = len;
3102            }
3103        }
3104        uenum_reset(availableLocales, status);
3105        /* save off parent info */
3106        if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
3107            fallbackList[i] = uprv_strdup(tmp);
3108        } else {
3109            fallbackList[i]=0;
3110        }
3111    }
3112
3113    for(maxLen--;maxLen>0;maxLen--) {
3114        for(i=0;i<acceptListCount;i++) {
3115            if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
3116#if defined(ULOC_DEBUG)
3117                fprintf(stderr,"Try: [%s]", fallbackList[i]);
3118#endif
3119                while((l=uenum_next(availableLocales, NULL, status))) {
3120#if defined(ULOC_DEBUG)
3121                    fprintf(stderr,"  %s\n", l);
3122#endif
3123                    len = (int32_t)uprv_strlen(l);
3124                    if(!uprv_strcmp(fallbackList[i], l)) {
3125                        if(outResult) {
3126                            *outResult = ULOC_ACCEPT_FALLBACK;
3127                        }
3128#if defined(ULOC_DEBUG)
3129                        fprintf(stderr, "fallback MATCH! %s\n", l);
3130#endif
3131                        if(len>0) {
3132                            uprv_strncpy(result, l, uprv_min(len, resultAvailable));
3133                        }
3134                        for(j=0;j<acceptListCount;j++) {
3135                            uprv_free(fallbackList[j]);
3136                        }
3137                        uprv_free(fallbackList);
3138                        return u_terminateChars(result, resultAvailable, len, status);
3139                    }
3140                }
3141                uenum_reset(availableLocales, status);
3142
3143                if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) {
3144                    uprv_free(fallbackList[i]);
3145                    fallbackList[i] = uprv_strdup(tmp);
3146                } else {
3147                    uprv_free(fallbackList[i]);
3148                    fallbackList[i]=0;
3149                }
3150            }
3151        }
3152        if(outResult) {
3153            *outResult = ULOC_ACCEPT_FAILED;
3154        }
3155    }
3156    for(i=0;i<acceptListCount;i++) {
3157        uprv_free(fallbackList[i]);
3158    }
3159    uprv_free(fallbackList);
3160    return -1;
3161}
3162
3163
3164/**
3165 * This function looks for the localeID in the likelySubtags resource.
3166 *
3167 * @param localeID The tag to find.
3168 * @param buffer A buffer to hold the matching entry
3169 * @param bufferLength The length of the output buffer
3170 * @return A pointer to "buffer" if found, or a null pointer if not.
3171 */
3172static const char*  U_CALLCONV
3173findLikelySubtags(const char* localeID,
3174                  char* buffer,
3175                  int32_t bufferLength,
3176                  UErrorCode* err) {
3177    const char* result = NULL;
3178
3179    if (!U_FAILURE(*err)) {
3180        int32_t resLen = 0;
3181        const UChar* s = NULL;
3182        UErrorCode tmpErr = U_ZERO_ERROR;
3183        UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
3184        if (U_SUCCESS(tmpErr)) {
3185            s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
3186
3187            if (U_FAILURE(tmpErr)) {
3188                /*
3189                 * If a resource is missing, it's not really an error, it's
3190                 * just that we don't have any data for that particular locale ID.
3191                 */
3192                if (tmpErr != U_MISSING_RESOURCE_ERROR) {
3193                    *err = tmpErr;
3194                }
3195            }
3196            else if (resLen >= bufferLength) {
3197                /* The buffer should never overflow. */
3198                *err = U_INTERNAL_PROGRAM_ERROR;
3199            }
3200            else {
3201                u_UCharsToChars(s, buffer, resLen + 1);
3202                result = buffer;
3203            }
3204
3205            ures_close(subtags);
3206        } else {
3207            *err = tmpErr;
3208        }
3209    }
3210
3211    return result;
3212}
3213
3214/**
3215 * Append a tag to a buffer, adding the separator if necessary.  The buffer
3216 * must be large enough to contain the resulting tag plus any separator
3217 * necessary. The tag must not be a zero-length string.
3218 *
3219 * @param tag The tag to add.
3220 * @param tagLength The length of the tag.
3221 * @param buffer The output buffer.
3222 * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
3223 **/
3224static void U_CALLCONV
3225appendTag(
3226    const char* tag,
3227    int32_t tagLength,
3228    char* buffer,
3229    int32_t* bufferLength) {
3230
3231    if (*bufferLength > 0) {
3232        buffer[*bufferLength] = '_';
3233        ++(*bufferLength);
3234    }
3235
3236    uprv_memmove(
3237        &buffer[*bufferLength],
3238        tag,
3239        tagLength);
3240
3241    *bufferLength += tagLength;
3242}
3243
3244/**
3245 * These are the canonical strings for unknown languages, scripts and regions.
3246 **/
3247static const char* const unknownLanguage = "und";
3248static const char* const unknownScript = "Zzzz";
3249static const char* const unknownRegion = "ZZ";
3250
3251/**
3252 * Create a tag string from the supplied parameters.  The lang, script and region
3253 * parameters may be NULL pointers. If they are, their corresponding length parameters
3254 * must be less than or equal to 0.
3255 *
3256 * If any of the language, script or region parameters are empty, and the alternateTags
3257 * parameter is not NULL, it will be parsed for potential language, script and region tags
3258 * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
3259 * it contains no language tag, the default tag for the unknown language is used.
3260 *
3261 * If the length of the new string exceeds the capacity of the output buffer,
3262 * the function copies as many bytes to the output buffer as it can, and returns
3263 * the error U_BUFFER_OVERFLOW_ERROR.
3264 *
3265 * If an illegal argument is provided, the function returns the error
3266 * U_ILLEGAL_ARGUMENT_ERROR.
3267 *
3268 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
3269 * the tag string fits in the output buffer, but the null terminator doesn't.
3270 *
3271 * @param lang The language tag to use.
3272 * @param langLength The length of the language tag.
3273 * @param script The script tag to use.
3274 * @param scriptLength The length of the script tag.
3275 * @param region The region tag to use.
3276 * @param regionLength The length of the region tag.
3277 * @param trailing Any trailing data to append to the new tag.
3278 * @param trailingLength The length of the trailing data.
3279 * @param alternateTags A string containing any alternate tags.
3280 * @param tag The output buffer.
3281 * @param tagCapacity The capacity of the output buffer.
3282 * @param err A pointer to a UErrorCode for error reporting.
3283 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
3284 **/
3285static int32_t U_CALLCONV
3286createTagStringWithAlternates(
3287    const char* lang,
3288    int32_t langLength,
3289    const char* script,
3290    int32_t scriptLength,
3291    const char* region,
3292    int32_t regionLength,
3293    const char* trailing,
3294    int32_t trailingLength,
3295    const char* alternateTags,
3296    char* tag,
3297    int32_t tagCapacity,
3298    UErrorCode* err) {
3299
3300    if (U_FAILURE(*err)) {
3301        goto error;
3302    }
3303    else if (tag == NULL ||
3304             tagCapacity <= 0 ||
3305             langLength >= ULOC_LANG_CAPACITY ||
3306             scriptLength >= ULOC_SCRIPT_CAPACITY ||
3307             regionLength >= ULOC_COUNTRY_CAPACITY) {
3308        goto error;
3309    }
3310    else {
3311        /**
3312         * ULOC_FULLNAME_CAPACITY will provide enough capacity
3313         * that we can build a string that contains the language,
3314         * script and region code without worrying about overrunning
3315         * the user-supplied buffer.
3316         **/
3317        char tagBuffer[ULOC_FULLNAME_CAPACITY];
3318        int32_t tagLength = 0;
3319        int32_t capacityRemaining = tagCapacity;
3320        UBool regionAppended = FALSE;
3321
3322        if (langLength > 0) {
3323            appendTag(
3324                lang,
3325                langLength,
3326                tagBuffer,
3327                &tagLength);
3328        }
3329        else if (alternateTags == NULL) {
3330            /*
3331             * Append the value for an unknown language, if
3332             * we found no language.
3333             */
3334            appendTag(
3335                unknownLanguage,
3336                uprv_strlen(unknownLanguage),
3337                tagBuffer,
3338                &tagLength);
3339        }
3340        else {
3341            /*
3342             * Parse the alternateTags string for the language.
3343             */
3344            char alternateLang[ULOC_LANG_CAPACITY];
3345            int32_t alternateLangLength = sizeof(alternateLang);
3346
3347            alternateLangLength =
3348                uloc_getLanguage(
3349                    alternateTags,
3350                    alternateLang,
3351                    alternateLangLength,
3352                    err);
3353            if(U_FAILURE(*err) ||
3354                alternateLangLength >= ULOC_LANG_CAPACITY) {
3355                goto error;
3356            }
3357            else if (alternateLangLength == 0) {
3358                /*
3359                 * Append the value for an unknown language, if
3360                 * we found no language.
3361                 */
3362                appendTag(
3363                    unknownLanguage,
3364                    uprv_strlen(unknownLanguage),
3365                    tagBuffer,
3366                    &tagLength);
3367            }
3368            else {
3369                appendTag(
3370                    alternateLang,
3371                    alternateLangLength,
3372                    tagBuffer,
3373                    &tagLength);
3374            }
3375        }
3376
3377        if (scriptLength > 0) {
3378            appendTag(
3379                script,
3380                scriptLength,
3381                tagBuffer,
3382                &tagLength);
3383        }
3384        else if (alternateTags != NULL) {
3385            /*
3386             * Parse the alternateTags string for the script.
3387             */
3388            char alternateScript[ULOC_SCRIPT_CAPACITY];
3389
3390            const int32_t alternateScriptLength =
3391                uloc_getScript(
3392                    alternateTags,
3393                    alternateScript,
3394                    sizeof(alternateScript),
3395                    err);
3396
3397            if (U_FAILURE(*err) ||
3398                alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
3399                goto error;
3400            }
3401            else if (alternateScriptLength > 0) {
3402                appendTag(
3403                    alternateScript,
3404                    alternateScriptLength,
3405                    tagBuffer,
3406                    &tagLength);
3407            }
3408        }
3409
3410        if (regionLength > 0) {
3411            appendTag(
3412                region,
3413                regionLength,
3414                tagBuffer,
3415                &tagLength);
3416
3417            regionAppended = TRUE;
3418        }
3419        else if (alternateTags != NULL) {
3420            /*
3421             * Parse the alternateTags string for the region.
3422             */
3423            char alternateRegion[ULOC_COUNTRY_CAPACITY];
3424
3425            const int32_t alternateRegionLength =
3426                uloc_getCountry(
3427                    alternateTags,
3428                    alternateRegion,
3429                    sizeof(alternateRegion),
3430                    err);
3431            if (U_FAILURE(*err) ||
3432                alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
3433                goto error;
3434            }
3435            else if (alternateRegionLength > 0) {
3436                appendTag(
3437                    alternateRegion,
3438                    alternateRegionLength,
3439                    tagBuffer,
3440                    &tagLength);
3441
3442                regionAppended = TRUE;
3443            }
3444        }
3445
3446        {
3447            const int32_t toCopy =
3448                tagLength >= tagCapacity ? tagCapacity : tagLength;
3449
3450            /**
3451             * Copy the partial tag from our internal buffer to the supplied
3452             * target.
3453             **/
3454            uprv_memcpy(
3455                tag,
3456                tagBuffer,
3457                toCopy);
3458
3459            capacityRemaining -= toCopy;
3460        }
3461
3462        if (trailingLength > 0) {
3463            if (capacityRemaining > 0 && !regionAppended) {
3464                tag[tagLength++] = '_';
3465                --capacityRemaining;
3466            }
3467
3468            if (capacityRemaining > 0) {
3469                /*
3470                 * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
3471                 * don't know if the user-supplied buffers overlap.
3472                 */
3473                const int32_t toCopy =
3474                    trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
3475
3476                uprv_memmove(
3477                    &tag[tagLength],
3478                    trailing,
3479                    toCopy);
3480            }
3481        }
3482
3483        tagLength += trailingLength;
3484
3485        return u_terminateChars(
3486                    tag,
3487                    tagCapacity,
3488                    tagLength,
3489                    err);
3490    }
3491
3492error:
3493
3494    /**
3495     * An overflow indicates the locale ID passed in
3496     * is ill-formed.  If we got here, and there was
3497     * no previous error, it's an implicit overflow.
3498     **/
3499    if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
3500        U_SUCCESS(*err)) {
3501        *err = U_ILLEGAL_ARGUMENT_ERROR;
3502    }
3503
3504    return -1;
3505}
3506
3507/**
3508 * Create a tag string from the supplied parameters.  The lang, script and region
3509 * parameters may be NULL pointers. If they are, their corresponding length parameters
3510 * must be less than or equal to 0.  If the lang parameter is an empty string, the
3511 * default value for an unknown language is written to the output buffer.
3512 *
3513 * If the length of the new string exceeds the capacity of the output buffer,
3514 * the function copies as many bytes to the output buffer as it can, and returns
3515 * the error U_BUFFER_OVERFLOW_ERROR.
3516 *
3517 * If an illegal argument is provided, the function returns the error
3518 * U_ILLEGAL_ARGUMENT_ERROR.
3519 *
3520 * @param lang The language tag to use.
3521 * @param langLength The length of the language tag.
3522 * @param script The script tag to use.
3523 * @param scriptLength The length of the script tag.
3524 * @param region The region tag to use.
3525 * @param regionLength The length of the region tag.
3526 * @param trailing Any trailing data to append to the new tag.
3527 * @param trailingLength The length of the trailing data.
3528 * @param tag The output buffer.
3529 * @param tagCapacity The capacity of the output buffer.
3530 * @param err A pointer to a UErrorCode for error reporting.
3531 * @return The length of the tag string, which may be greater than tagCapacity.
3532 **/
3533static int32_t U_CALLCONV
3534createTagString(
3535    const char* lang,
3536    int32_t langLength,
3537    const char* script,
3538    int32_t scriptLength,
3539    const char* region,
3540    int32_t regionLength,
3541    const char* trailing,
3542    int32_t trailingLength,
3543    char* tag,
3544    int32_t tagCapacity,
3545    UErrorCode* err)
3546{
3547    return createTagStringWithAlternates(
3548                lang,
3549                langLength,
3550                script,
3551                scriptLength,
3552                region,
3553                regionLength,
3554                trailing,
3555                trailingLength,
3556                NULL,
3557                tag,
3558                tagCapacity,
3559                err);
3560}
3561
3562/**
3563 * Parse the language, script, and region subtags from a tag string, and copy the
3564 * results into the corresponding output parameters. The buffers are null-terminated,
3565 * unless overflow occurs.
3566 *
3567 * The langLength, scriptLength, and regionLength parameters are input/output
3568 * parameters, and must contain the capacity of their corresponding buffers on
3569 * input.  On output, they will contain the actual length of the buffers, not
3570 * including the null terminator.
3571 *
3572 * If the length of any of the output subtags exceeds the capacity of the corresponding
3573 * buffer, the function copies as many bytes to the output buffer as it can, and returns
3574 * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
3575 * occurs.
3576 *
3577 * If an illegal argument is provided, the function returns the error
3578 * U_ILLEGAL_ARGUMENT_ERROR.
3579 *
3580 * @param localeID The locale ID to parse.
3581 * @param lang The language tag buffer.
3582 * @param langLength The length of the language tag.
3583 * @param script The script tag buffer.
3584 * @param scriptLength The length of the script tag.
3585 * @param region The region tag buffer.
3586 * @param regionLength The length of the region tag.
3587 * @param err A pointer to a UErrorCode for error reporting.
3588 * @return The number of chars of the localeID parameter consumed.
3589 **/
3590static int32_t U_CALLCONV
3591parseTagString(
3592    const char* localeID,
3593    char* lang,
3594    int32_t* langLength,
3595    char* script,
3596    int32_t* scriptLength,
3597    char* region,
3598    int32_t* regionLength,
3599    UErrorCode* err)
3600{
3601    const char* position = localeID;
3602    int32_t subtagLength = 0;
3603
3604    if(U_FAILURE(*err) ||
3605       localeID == NULL ||
3606       lang == NULL ||
3607       langLength == NULL ||
3608       script == NULL ||
3609       scriptLength == NULL ||
3610       region == NULL ||
3611       regionLength == NULL) {
3612        goto error;
3613    }
3614
3615    subtagLength = _getLanguage(position, lang, *langLength, &position);
3616    u_terminateChars(lang, *langLength, subtagLength, err);
3617
3618    /*
3619     * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
3620     * to be an error, because it indicates the user-supplied tag is
3621     * not well-formed.
3622     */
3623    if(U_FAILURE(*err)) {
3624        goto error;
3625    }
3626
3627    *langLength = subtagLength;
3628
3629    /*
3630     * If no language was present, use the value of unknownLanguage
3631     * instead.  Otherwise, move past any separator.
3632     */
3633    if (*langLength == 0) {
3634        uprv_strcpy(
3635            lang,
3636            unknownLanguage);
3637        *langLength = uprv_strlen(lang);
3638    }
3639    else if (_isIDSeparator(*position)) {
3640        ++position;
3641    }
3642
3643    subtagLength = _getScript(position, script, *scriptLength, &position);
3644    u_terminateChars(script, *scriptLength, subtagLength, err);
3645
3646    if(U_FAILURE(*err)) {
3647        goto error;
3648    }
3649
3650    *scriptLength = subtagLength;
3651
3652    if (*scriptLength > 0) {
3653        if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
3654            /**
3655             * If the script part is the "unknown" script, then don't return it.
3656             **/
3657            *scriptLength = 0;
3658        }
3659
3660        /*
3661         * Move past any separator.
3662         */
3663        if (_isIDSeparator(*position)) {
3664            ++position;
3665        }
3666    }
3667
3668    subtagLength = _getCountry(position, region, *regionLength, &position);
3669    u_terminateChars(region, *regionLength, subtagLength, err);
3670
3671    if(U_FAILURE(*err)) {
3672        goto error;
3673    }
3674
3675    *regionLength = subtagLength;
3676
3677    if (*regionLength > 0) {
3678        if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
3679            /**
3680             * If the region part is the "unknown" region, then don't return it.
3681             **/
3682            *regionLength = 0;
3683        }
3684    }
3685
3686exit:
3687
3688    return (int32_t)(position - localeID);
3689
3690error:
3691
3692    /**
3693     * If we get here, we have no explicit error, it's the result of an
3694     * illegal argument.
3695     **/
3696    if (!U_FAILURE(*err)) {
3697        *err = U_ILLEGAL_ARGUMENT_ERROR;
3698    }
3699
3700    goto exit;
3701}
3702
3703static int32_t U_CALLCONV
3704createLikelySubtagsString(
3705    const char* lang,
3706    int32_t langLength,
3707    const char* script,
3708    int32_t scriptLength,
3709    const char* region,
3710    int32_t regionLength,
3711    const char* variants,
3712    int32_t variantsLength,
3713    char* tag,
3714    int32_t tagCapacity,
3715    UErrorCode* err)
3716{
3717    /**
3718     * ULOC_FULLNAME_CAPACITY will provide enough capacity
3719     * that we can build a string that contains the language,
3720     * script and region code without worrying about overrunning
3721     * the user-supplied buffer.
3722     **/
3723    char tagBuffer[ULOC_FULLNAME_CAPACITY];
3724    char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
3725    int32_t tagBufferLength = 0;
3726
3727    if(U_FAILURE(*err)) {
3728        goto error;
3729    }
3730
3731    /**
3732     * Try the language with the script and region first.
3733     **/
3734    if (scriptLength > 0 && regionLength > 0) {
3735
3736        const char* likelySubtags = NULL;
3737
3738        tagBufferLength = createTagString(
3739            lang,
3740            langLength,
3741            script,
3742            scriptLength,
3743            region,
3744            regionLength,
3745            NULL,
3746            0,
3747            tagBuffer,
3748            sizeof(tagBuffer),
3749            err);
3750        if(U_FAILURE(*err)) {
3751            goto error;
3752        }
3753
3754        likelySubtags =
3755            findLikelySubtags(
3756                tagBuffer,
3757                likelySubtagsBuffer,
3758                sizeof(likelySubtagsBuffer),
3759                err);
3760        if(U_FAILURE(*err)) {
3761            goto error;
3762        }
3763
3764        if (likelySubtags != NULL) {
3765            /* Always use the language tag from the
3766               maximal string, since it may be more
3767               specific than the one provided. */
3768            return createTagStringWithAlternates(
3769                        NULL,
3770                        0,
3771                        NULL,
3772                        0,
3773                        NULL,
3774                        0,
3775                        variants,
3776                        variantsLength,
3777                        likelySubtags,
3778                        tag,
3779                        tagCapacity,
3780                        err);
3781        }
3782    }
3783
3784    /**
3785     * Try the language with just the script.
3786     **/
3787    if (scriptLength > 0) {
3788
3789        const char* likelySubtags = NULL;
3790
3791        tagBufferLength = createTagString(
3792            lang,
3793            langLength,
3794            script,
3795            scriptLength,
3796            NULL,
3797            0,
3798            NULL,
3799            0,
3800            tagBuffer,
3801            sizeof(tagBuffer),
3802            err);
3803        if(U_FAILURE(*err)) {
3804            goto error;
3805        }
3806
3807        likelySubtags =
3808            findLikelySubtags(
3809                tagBuffer,
3810                likelySubtagsBuffer,
3811                sizeof(likelySubtagsBuffer),
3812                err);
3813        if(U_FAILURE(*err)) {
3814            goto error;
3815        }
3816
3817        if (likelySubtags != NULL) {
3818            /* Always use the language tag from the
3819               maximal string, since it may be more
3820               specific than the one provided. */
3821            return createTagStringWithAlternates(
3822                        NULL,
3823                        0,
3824                        NULL,
3825                        0,
3826                        region,
3827                        regionLength,
3828                        variants,
3829                        variantsLength,
3830                        likelySubtags,
3831                        tag,
3832                        tagCapacity,
3833                        err);
3834        }
3835    }
3836
3837    /**
3838     * Try the language with just the region.
3839     **/
3840    if (regionLength > 0) {
3841
3842        const char* likelySubtags = NULL;
3843
3844        createTagString(
3845            lang,
3846            langLength,
3847            NULL,
3848            0,
3849            region,
3850            regionLength,
3851            NULL,
3852            0,
3853            tagBuffer,
3854            sizeof(tagBuffer),
3855            err);
3856        if(U_FAILURE(*err)) {
3857            goto error;
3858        }
3859
3860        likelySubtags =
3861            findLikelySubtags(
3862                tagBuffer,
3863                likelySubtagsBuffer,
3864                sizeof(likelySubtagsBuffer),
3865                err);
3866        if(U_FAILURE(*err)) {
3867            goto error;
3868        }
3869
3870        if (likelySubtags != NULL) {
3871            /* Always use the language tag from the
3872               maximal string, since it may be more
3873               specific than the one provided. */
3874            return createTagStringWithAlternates(
3875                        NULL,
3876                        0,
3877                        script,
3878                        scriptLength,
3879                        NULL,
3880                        0,
3881                        variants,
3882                        variantsLength,
3883                        likelySubtags,
3884                        tag,
3885                        tagCapacity,
3886                        err);
3887        }
3888    }
3889
3890    /**
3891     * Finally, try just the language.
3892     **/
3893    {
3894        const char* likelySubtags = NULL;
3895
3896        createTagString(
3897            lang,
3898            langLength,
3899            NULL,
3900            0,
3901            NULL,
3902            0,
3903            NULL,
3904            0,
3905            tagBuffer,
3906            sizeof(tagBuffer),
3907            err);
3908        if(U_FAILURE(*err)) {
3909            goto error;
3910        }
3911
3912        likelySubtags =
3913            findLikelySubtags(
3914                tagBuffer,
3915                likelySubtagsBuffer,
3916                sizeof(likelySubtagsBuffer),
3917                err);
3918        if(U_FAILURE(*err)) {
3919            goto error;
3920        }
3921
3922        if (likelySubtags != NULL) {
3923            /* Always use the language tag from the
3924               maximal string, since it may be more
3925               specific than the one provided. */
3926            return createTagStringWithAlternates(
3927                        NULL,
3928                        0,
3929                        script,
3930                        scriptLength,
3931                        region,
3932                        regionLength,
3933                        variants,
3934                        variantsLength,
3935                        likelySubtags,
3936                        tag,
3937                        tagCapacity,
3938                        err);
3939        }
3940    }
3941
3942    return u_terminateChars(
3943                tag,
3944                tagCapacity,
3945                0,
3946                err);
3947
3948error:
3949
3950    if (!U_FAILURE(*err)) {
3951        *err = U_ILLEGAL_ARGUMENT_ERROR;
3952    }
3953
3954    return -1;
3955}
3956
3957static int32_t
3958_uloc_addLikelySubtags(const char*    localeID,
3959         char* maximizedLocaleID,
3960         int32_t maximizedLocaleIDCapacity,
3961         UErrorCode* err)
3962{
3963    char lang[ULOC_LANG_CAPACITY];
3964    int32_t langLength = sizeof(lang);
3965    char script[ULOC_SCRIPT_CAPACITY];
3966    int32_t scriptLength = sizeof(script);
3967    char region[ULOC_COUNTRY_CAPACITY];
3968    int32_t regionLength = sizeof(region);
3969    const char* trailing = "";
3970    int32_t trailingLength = 0;
3971    int32_t trailingIndex = 0;
3972    int32_t resultLength = 0;
3973
3974    if(U_FAILURE(*err)) {
3975        goto error;
3976    }
3977    else if (localeID == NULL ||
3978             maximizedLocaleID == NULL ||
3979             maximizedLocaleIDCapacity <= 0) {
3980        goto error;
3981    }
3982
3983    trailingIndex = parseTagString(
3984        localeID,
3985        lang,
3986        &langLength,
3987        script,
3988        &scriptLength,
3989        region,
3990        &regionLength,
3991        err);
3992    if(U_FAILURE(*err)) {
3993        /* Overflow indicates an illegal argument error */
3994        if (*err == U_BUFFER_OVERFLOW_ERROR) {
3995            *err = U_ILLEGAL_ARGUMENT_ERROR;
3996        }
3997
3998        goto error;
3999    }
4000
4001    /* Find the length of the trailing portion. */
4002    trailing = &localeID[trailingIndex];
4003    trailingLength = uprv_strlen(trailing);
4004
4005    resultLength =
4006        createLikelySubtagsString(
4007            lang,
4008            langLength,
4009            script,
4010            scriptLength,
4011            region,
4012            regionLength,
4013            trailing,
4014            trailingLength,
4015            maximizedLocaleID,
4016            maximizedLocaleIDCapacity,
4017            err);
4018
4019    if (resultLength == 0) {
4020        const int32_t localIDLength =
4021            uprv_strlen(localeID);
4022
4023        /*
4024         * If we get here, we need to return localeID.
4025         */
4026        uprv_memcpy(
4027            maximizedLocaleID,
4028            localeID,
4029            localIDLength <= maximizedLocaleIDCapacity ?
4030                localIDLength : maximizedLocaleIDCapacity);
4031
4032        resultLength =
4033            u_terminateChars(
4034                maximizedLocaleID,
4035                maximizedLocaleIDCapacity,
4036                localIDLength,
4037                err);
4038    }
4039
4040    return resultLength;
4041
4042error:
4043
4044    if (!U_FAILURE(*err)) {
4045        *err = U_ILLEGAL_ARGUMENT_ERROR;
4046    }
4047
4048    return -1;
4049}
4050
4051static int32_t
4052_uloc_minimizeSubtags(const char*    localeID,
4053         char* minimizedLocaleID,
4054         int32_t minimizedLocaleIDCapacity,
4055         UErrorCode* err)
4056{
4057    /**
4058     * ULOC_FULLNAME_CAPACITY will provide enough capacity
4059     * that we can build a string that contains the language,
4060     * script and region code without worrying about overrunning
4061     * the user-supplied buffer.
4062     **/
4063    char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
4064    int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
4065
4066    char lang[ULOC_LANG_CAPACITY];
4067    int32_t langLength = sizeof(lang);
4068    char script[ULOC_SCRIPT_CAPACITY];
4069    int32_t scriptLength = sizeof(script);
4070    char region[ULOC_COUNTRY_CAPACITY];
4071    int32_t regionLength = sizeof(region);
4072    const char* trailing = "";
4073    int32_t trailingLength = 0;
4074    int32_t trailingIndex = 0;
4075
4076    if(U_FAILURE(*err)) {
4077        goto error;
4078    }
4079    else if (localeID == NULL ||
4080             minimizedLocaleID == NULL ||
4081             minimizedLocaleIDCapacity <= 0) {
4082        goto error;
4083    }
4084
4085    trailingIndex =
4086        parseTagString(
4087            localeID,
4088            lang,
4089            &langLength,
4090            script,
4091            &scriptLength,
4092            region,
4093            &regionLength,
4094            err);
4095    if(U_FAILURE(*err)) {
4096
4097        /* Overflow indicates an illegal argument error */
4098        if (*err == U_BUFFER_OVERFLOW_ERROR) {
4099            *err = U_ILLEGAL_ARGUMENT_ERROR;
4100        }
4101
4102        goto error;
4103    }
4104
4105    /* Find the spot where the variants begin, if any. */
4106    trailing = &localeID[trailingIndex];
4107    trailingLength = uprv_strlen(trailing);
4108
4109    createTagString(
4110        lang,
4111        langLength,
4112        script,
4113        scriptLength,
4114        region,
4115        regionLength,
4116        NULL,
4117        0,
4118        maximizedTagBuffer,
4119        maximizedTagBufferLength,
4120        err);
4121    if(U_FAILURE(*err)) {
4122        goto error;
4123    }
4124
4125    /**
4126     * First, we need to first get the maximization
4127     * from AddLikelySubtags.
4128     **/
4129    maximizedTagBufferLength =
4130        uloc_addLikelySubtags(
4131            maximizedTagBuffer,
4132            maximizedTagBuffer,
4133            maximizedTagBufferLength,
4134            err);
4135
4136    if(U_FAILURE(*err)) {
4137        goto error;
4138    }
4139
4140    /**
4141     * Start first with just the language.
4142     **/
4143    {
4144        char tagBuffer[ULOC_FULLNAME_CAPACITY];
4145
4146        const int32_t tagBufferLength =
4147            createLikelySubtagsString(
4148                lang,
4149                langLength,
4150                NULL,
4151                0,
4152                NULL,
4153                0,
4154                NULL,
4155                0,
4156                tagBuffer,
4157                sizeof(tagBuffer),
4158                err);
4159
4160        if(U_FAILURE(*err)) {
4161            goto error;
4162        }
4163        else if (uprv_strnicmp(
4164                    maximizedTagBuffer,
4165                    tagBuffer,
4166                    tagBufferLength) == 0) {
4167
4168            return createTagString(
4169                        lang,
4170                        langLength,
4171                        NULL,
4172                        0,
4173                        NULL,
4174                        0,
4175                        trailing,
4176                        trailingLength,
4177                        minimizedLocaleID,
4178                        minimizedLocaleIDCapacity,
4179                        err);
4180        }
4181    }
4182
4183    /**
4184     * Next, try the language and region.
4185     **/
4186    if (regionLength > 0) {
4187
4188        char tagBuffer[ULOC_FULLNAME_CAPACITY];
4189
4190        const int32_t tagBufferLength =
4191            createLikelySubtagsString(
4192                lang,
4193                langLength,
4194                NULL,
4195                0,
4196                region,
4197                regionLength,
4198                NULL,
4199                0,
4200                tagBuffer,
4201                sizeof(tagBuffer),
4202                err);
4203
4204        if(U_FAILURE(*err)) {
4205            goto error;
4206        }
4207        else if (uprv_strnicmp(
4208                    maximizedTagBuffer,
4209                    tagBuffer,
4210                    tagBufferLength) == 0) {
4211
4212            return createTagString(
4213                        lang,
4214                        langLength,
4215                        NULL,
4216                        0,
4217                        region,
4218                        regionLength,
4219                        trailing,
4220                        trailingLength,
4221                        minimizedLocaleID,
4222                        minimizedLocaleIDCapacity,
4223                        err);
4224        }
4225    }
4226
4227    /**
4228     * Finally, try the language and script.  This is our last chance,
4229     * since trying with all three subtags would only yield the
4230     * maximal version that we already have.
4231     **/
4232    if (scriptLength > 0 && regionLength > 0) {
4233        char tagBuffer[ULOC_FULLNAME_CAPACITY];
4234
4235        const int32_t tagBufferLength =
4236            createLikelySubtagsString(
4237                lang,
4238                langLength,
4239                script,
4240                scriptLength,
4241                NULL,
4242                0,
4243                NULL,
4244                0,
4245                tagBuffer,
4246                sizeof(tagBuffer),
4247                err);
4248
4249        if(U_FAILURE(*err)) {
4250            goto error;
4251        }
4252        else if (uprv_strnicmp(
4253                    maximizedTagBuffer,
4254                    tagBuffer,
4255                    tagBufferLength) == 0) {
4256
4257            return createTagString(
4258                        lang,
4259                        langLength,
4260                        script,
4261                        scriptLength,
4262                        NULL,
4263                        0,
4264                        trailing,
4265                        trailingLength,
4266                        minimizedLocaleID,
4267                        minimizedLocaleIDCapacity,
4268                        err);
4269        }
4270    }
4271
4272    {
4273        /**
4274         * If we got here, return the locale ID parameter.
4275         **/
4276        const int32_t localeIDLength = uprv_strlen(localeID);
4277
4278        uprv_memcpy(
4279            minimizedLocaleID,
4280            localeID,
4281            localeIDLength <= minimizedLocaleIDCapacity ?
4282                localeIDLength : minimizedLocaleIDCapacity);
4283
4284        return u_terminateChars(
4285                    minimizedLocaleID,
4286                    minimizedLocaleIDCapacity,
4287                    localeIDLength,
4288                    err);
4289    }
4290
4291error:
4292
4293    if (!U_FAILURE(*err)) {
4294        *err = U_ILLEGAL_ARGUMENT_ERROR;
4295    }
4296
4297    return -1;
4298
4299
4300}
4301
4302static UBool
4303do_canonicalize(const char*    localeID,
4304         char* buffer,
4305         int32_t bufferCapacity,
4306         UErrorCode* err)
4307{
4308    uloc_canonicalize(
4309        localeID,
4310        buffer,
4311        bufferCapacity,
4312        err);
4313
4314    if (*err == U_STRING_NOT_TERMINATED_WARNING ||
4315        *err == U_BUFFER_OVERFLOW_ERROR) {
4316        *err = U_ILLEGAL_ARGUMENT_ERROR;
4317
4318        return FALSE;
4319    }
4320    else if (U_FAILURE(*err)) {
4321
4322        return FALSE;
4323    }
4324    else {
4325        return TRUE;
4326    }
4327}
4328
4329U_DRAFT int32_t U_EXPORT2
4330uloc_addLikelySubtags(const char*    localeID,
4331         char* maximizedLocaleID,
4332         int32_t maximizedLocaleIDCapacity,
4333         UErrorCode* err)
4334{
4335    char localeBuffer[ULOC_FULLNAME_CAPACITY];
4336
4337    if (!do_canonicalize(
4338        localeID,
4339        localeBuffer,
4340        sizeof(localeBuffer),
4341        err)) {
4342        return -1;
4343    }
4344    else {
4345        return _uloc_addLikelySubtags(
4346                    localeBuffer,
4347                    maximizedLocaleID,
4348                    maximizedLocaleIDCapacity,
4349                    err);
4350    }
4351}
4352
4353U_DRAFT int32_t U_EXPORT2
4354uloc_minimizeSubtags(const char*    localeID,
4355         char* minimizedLocaleID,
4356         int32_t minimizedLocaleIDCapacity,
4357         UErrorCode* err)
4358{
4359    char localeBuffer[ULOC_FULLNAME_CAPACITY];
4360
4361    if (!do_canonicalize(
4362        localeID,
4363        localeBuffer,
4364        sizeof(localeBuffer),
4365        err)) {
4366        return -1;
4367    }
4368    else {
4369        return _uloc_minimizeSubtags(
4370                    localeBuffer,
4371                    minimizedLocaleID,
4372                    minimizedLocaleIDCapacity,
4373                    err);
4374    }
4375}
4376
4377/*eof*/
4378