1/* 2********************************************************************** 3* Copyright (C) 1997-2009, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* 7* File ULOC.CPP 8* 9* Modification History: 10* 11* Date Name Description 12* 04/01/97 aliu Creation. 13* 08/21/98 stephen JDK 1.2 sync 14* 12/08/98 rtg New Locale implementation and C API 15* 03/15/99 damiba overhaul. 16* 04/06/99 stephen changed setDefault() to realloc and copy 17* 06/14/99 stephen Changed calls to ures_open for new params 18* 07/21/99 stephen Modified setDefault() to propagate to C++ 19* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, 20* brought canonicalization code into line with spec 21*****************************************************************************/ 22 23/* 24 POSIX's locale format, from putil.c: [no spaces] 25 26 ll [ _CC ] [ . MM ] [ @ VV] 27 28 l = lang, C = ctry, M = charmap, V = variant 29*/ 30 31#include "unicode/utypes.h" 32#include "unicode/ustring.h" 33#include "unicode/uloc.h" 34#include "unicode/ures.h" 35 36#include "putilimp.h" 37#include "ustr_imp.h" 38#include "ulocimp.h" 39#include "uresimp.h" 40#include "umutex.h" 41#include "cstring.h" 42#include "cmemory.h" 43#include "ucln_cmn.h" 44#include "locmap.h" 45#include "uarrsort.h" 46#include "uenumimp.h" 47#include "uassert.h" 48 49#include <stdio.h> /* for sprintf */ 50 51/* ### Declarations **************************************************/ 52 53/* Locale stuff from locid.cpp */ 54U_CFUNC void locale_set_default(const char *id); 55U_CFUNC const char *locale_get_default(void); 56U_CFUNC int32_t 57locale_getKeywords(const char *localeID, 58 char prev, 59 char *keywords, int32_t keywordCapacity, 60 char *values, int32_t valuesCapacity, int32_t *valLen, 61 UBool valuesToo, 62 UErrorCode *status); 63 64/* ### Constants **************************************************/ 65 66/* These strings describe the resources we attempt to load from 67 the locale ResourceBundle data file.*/ 68static const char _kLanguages[] = "Languages"; 69static const char _kScripts[] = "Scripts"; 70static const char _kCountries[] = "Countries"; 71static const char _kVariants[] = "Variants"; 72static const char _kKeys[] = "Keys"; 73static const char _kTypes[] = "Types"; 74static const char _kIndexLocaleName[] = "res_index"; 75static const char _kRootName[] = "root"; 76static const char _kIndexTag[] = "InstalledLocales"; 77static const char _kCurrency[] = "currency"; 78static const char _kCurrencies[] = "Currencies"; 79static char** _installedLocales = NULL; 80static int32_t _installedLocalesCount = 0; 81 82/* ### Data tables **************************************************/ 83 84/** 85 * Table of language codes, both 2- and 3-letter, with preference 86 * given to 2-letter codes where possible. Includes 3-letter codes 87 * that lack a 2-letter equivalent. 88 * 89 * This list must be in sorted order. This list is returned directly 90 * to the user by some API. 91 * 92 * This list must be kept in sync with LANGUAGES_3, with corresponding 93 * entries matched. 94 * 95 * This table should be terminated with a NULL entry, followed by a 96 * second list, and another NULL entry. The first list is visible to 97 * user code when this array is returned by API. The second list 98 * contains codes we support, but do not expose through user API. 99 * 100 * Notes 101 * 102 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to 103 * include the revisions up to 2001/7/27 *CWB* 104 * 105 * The 3 character codes are the terminology codes like RFC 3066. This 106 * is compatible with prior ICU codes 107 * 108 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the 109 * table but now at the end of the table because 3 character codes are 110 * duplicates. This avoids bad searches going from 3 to 2 character 111 * codes. 112 * 113 * The range qaa-qtz is reserved for local use 114 */ 115static const char * const LANGUAGES[] = { 116 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", 117 "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", 118 "ang", "anp", "apa", 119 "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", 120 "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", 121 "bai", "bal", "ban", "bas", "bat", "be", "bej", 122 "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", 123 "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", 124 "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", 125 "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", 126 "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", 127 "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", 128 "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", 129 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", 130 "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", 131 "enm", "eo", "es", "et", "eu", "ewo", "fa", 132 "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", 133 "fr", "frm", "fro", "frr", "frs", "fur", "fy", 134 "ga", "gaa", "gay", "gba", "gd", "gem", "gez", "gil", 135 "gl", "gmh", "gn", "goh", "gon", "gor", "got", "grb", 136 "grc", "gsw", "gu", "gv", "gwi", 137 "ha", "hai", "haw", "he", "hi", "hil", "him", 138 "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", 139 "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", 140 "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", 141 "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", 142 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi", 143 "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", 144 "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", 145 "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", 146 "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", 147 "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", 148 "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", 149 "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min", 150 "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", 151 "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", 152 "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", 153 "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", 154 "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", 155 "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", 156 "om", "or", "os", "osa", "ota", "oto", "pa", "paa", 157 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", 158 "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", 159 "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", 160 "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", 161 "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", 162 "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", 163 "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", 164 "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", 165 "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", 166 "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", 167 "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", 168 "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", 169 "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", 170 "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", 171 "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", 172 "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", 173 "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", 174 "zu", "zun", "zxx", "zza", 175NULL, 176 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ 177NULL 178}; 179static const char* const DEPRECATED_LANGUAGES[]={ 180 "in", "iw", "ji", "jw", NULL, NULL 181}; 182static const char* const REPLACEMENT_LANGUAGES[]={ 183 "id", "he", "yi", "jv", NULL, NULL 184}; 185 186/** 187 * Table of 3-letter language codes. 188 * 189 * This is a lookup table used to convert 3-letter language codes to 190 * their 2-letter equivalent, where possible. It must be kept in sync 191 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the 192 * same language as LANGUAGES_3[i]. The commented-out lines are 193 * copied from LANGUAGES to make eyeballing this baby easier. 194 * 195 * Where a 3-letter language code has no 2-letter equivalent, the 196 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. 197 * 198 * This table should be terminated with a NULL entry, followed by a 199 * second list, and another NULL entry. The two lists correspond to 200 * the two lists in LANGUAGES. 201 */ 202static const char * const LANGUAGES_3[] = { 203/* "aa", "ab", "ace", "ach", "ada", "ady", "ae", "af", "afa", */ 204 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "afr", "afa", 205/* "afh", "ain", "ak", "akk", "ale", "alg", "alt", "am", "an", "ang", "anp", "apa", */ 206 "afh", "ain", "aka", "akk", "ale", "alg", "alt", "amh", "arg", "ang", "anp", "apa", 207/* "ar", "arc", "arn", "arp", "art", "arw", "as", "ast", */ 208 "ara", "arc", "arn", "arp", "art", "arw", "asm", "ast", 209/* "ath", "aus", "av", "awa", "ay", "az", "ba", "bad", */ 210 "ath", "aus", "ava", "awa", "aym", "aze", "bak", "bad", 211/* "bai", "bal", "ban", "bas", "bat", "be", "bej", */ 212 "bai", "bal", "ban", "bas", "bat", "bel", "bej", 213/* "bem", "ber", "bg", "bh", "bho", "bi", "bik", "bin", */ 214 "bem", "ber", "bul", "bih", "bho", "bis", "bik", "bin", 215/* "bla", "bm", "bn", "bnt", "bo", "br", "bra", "bs", */ 216 "bla", "bam", "ben", "bnt", "bod", "bre", "bra", "bos", 217/* "btk", "bua", "bug", "byn", "ca", "cad", "cai", "car", "cau", */ 218 "btk", "bua", "bug", "byn", "cat", "cad", "cai", "car", "cau", 219/* "cch", "ce", "ceb", "cel", "ch", "chb", "chg", "chk", "chm", */ 220 "cch", "che", "ceb", "cel", "cha", "chb", "chg", "chk", "chm", 221/* "chn", "cho", "chp", "chr", "chy", "cmc", "co", "cop", */ 222 "chn", "cho", "chp", "chr", "chy", "cmc", "cos", "cop", 223/* "cpe", "cpf", "cpp", "cr", "crh", "crp", "cs", "csb", "cu", "cus", */ 224 "cpe", "cpf", "cpp", "cre", "crh", "crp", "ces", "csb", "chu", "cus", 225/* "cv", "cy", "da", "dak", "dar", "day", "de", "del", "den", */ 226 "chv", "cym", "dan", "dak", "dar", "day", "deu", "del", "den", 227/* "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "dv", "dyu", */ 228 "dgr", "din", "doi", "dra", "dsb", "dua", "dum", "div", "dyu", 229/* "dz", "ee", "efi", "egy", "eka", "el", "elx", "en", */ 230 "dzo", "ewe", "efi", "egy", "eka", "ell", "elx", "eng", 231/* "enm", "eo", "es", "et", "eu", "ewo", "fa", */ 232 "enm", "epo", "spa", "est", "eus", "ewo", "fas", 233/* "fan", "fat", "ff", "fi", "fil", "fiu", "fj", "fo", "fon", */ 234 "fan", "fat", "ful", "fin", "fil", "fiu", "fij", "fao", "fon", 235/* "fr", "frm", "fro", "frr", "frs", "fur", "fy", "ga", "gaa", "gay", */ 236 "fra", "frm", "fro", "frr", "frs", "fur", "fry", "gle", "gaa", "gay", 237/* "gba", "gd", "gem", "gez", "gil", "gl", "gmh", "gn", */ 238 "gba", "gla", "gem", "gez", "gil", "glg", "gmh", "grn", 239/* "goh", "gon", "gor", "got", "grb", "grc", "gsw", "gu", "gv", */ 240 "goh", "gon", "gor", "got", "grb", "grc", "gsw", "guj", "glv", 241/* "gwi", "ha", "hai", "haw", "he", "hi", "hil", "him", */ 242 "gwi", "hau", "hai", "haw", "heb", "hin", "hil", "him", 243/* "hit", "hmn", "ho", "hr", "hsb", "ht", "hu", "hup", "hy", "hz", */ 244 "hit", "hmn", "hmo", "hrv", "hsb", "hat", "hun", "hup", "hye", "her", 245/* "ia", "iba", "id", "ie", "ig", "ii", "ijo", "ik", */ 246 "ina", "iba", "ind", "ile", "ibo", "iii", "ijo", "ipk", 247/* "ilo", "inc", "ine", "inh", "io", "ira", "iro", "is", "it", */ 248 "ilo", "inc", "ine", "inh", "ido", "ira", "iro", "isl", "ita", 249/* "iu", "ja", "jbo", "jpr", "jrb", "jv", "ka", "kaa", "kab", */ 250 "iku", "jpn", "jbo", "jpr", "jrb", "jav", "kat", "kaa", "kab", 251/* "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi",*/ 252 "kac", "kaj", "kam", "kar", "kaw", "kbd", "kcg", "kfo", "kg", "kha", "khi", 253/* "kho", "ki", "kj", "kk", "kl", "km", "kmb", "kn", */ 254 "kho", "kik", "kua", "kaz", "kal", "khm", "kmb", "kan", 255/* "ko", "kok", "kos", "kpe", "kr", "krc", "krl", "kro", "kru", "ks", */ 256 "kor", "kok", "kos", "kpe", "kau", "krc", "krl", "kro", "kru", "kas", 257/* "ku", "kum", "kut", "kv", "kw", "ky", "la", "lad", */ 258 "kur", "kum", "kut", "kom", "cor", "kir", "lat", "lad", 259/* "lah", "lam", "lb", "lez", "lg", "li", "ln", "lo", "lol", */ 260 "lah", "lam", "ltz", "lez", "lug", "lim", "lin", "lao", "lol", 261/* "loz", "lt", "lu", "lua", "lui", "lun", "luo", "lus", */ 262 "loz", "lit", "lub", "lua", "lui", "lun", "luo", "lus", 263/* "lv", "mad", "mag", "mai", "mak", "man", "map", "mas", */ 264 "lav", "mad", "mag", "mai", "mak", "man", "map", "mas", 265/* "mdf", "mdr", "men", "mfe", "mg", "mga", "mh", "mi", "mic", "min", */ 266 "mdf", "mdr", "men", "mfe", "mlg", "mga", "mah", "mri", "mic", "min", 267/* "mis", "mk", "mkh", "ml", "mn", "mnc", "mni", "mno", */ 268 "mis", "mkd", "mkh", "mal", "mon", "mnc", "mni", "mno", 269/* "mo", "moh", "mos", "mr", "ms", "mt", "mul", "mun", */ 270 "mol", "moh", "mos", "mar", "msa", "mlt", "mul", "mun", 271/* "mus", "mwl", "mwr", "my", "myn", "myv", "na", "nah", "nai", "nap", */ 272 "mus", "mwl", "mwr", "mya", "myn", "myv", "nau", "nah", "nai", "nap", 273/* "nb", "nd", "nds", "ne", "new", "ng", "nia", "nic", */ 274 "nob", "nde", "nds", "nep", "new", "ndo", "nia", "nic", 275/* "niu", "nl", "nn", "no", "nog", "non", "nqo", "nr", "nso", "nub", */ 276 "niu", "nld", "nno", "nor", "nog", "non", "nqo", "nbl", "nso", "nub", 277/* "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", "oc", "oj", */ 278 "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", "oci", "oji", 279/* "om", "or", "os", "osa", "ota", "oto", "pa", "paa", */ 280 "orm", "ori", "oss", "osa", "ota", "oto", "pan", "paa", 281/* "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", */ 282 "pag", "pal", "pam", "pap", "pau", "peo", "phi", "phn", 283/* "pi", "pl", "pon", "pra", "pro", "ps", "pt", "qu", */ 284 "pli", "pol", "pon", "pra", "pro", "pus", "por", "que", 285/* "raj", "rap", "rar", "rm", "rn", "ro", "roa", "rom", */ 286 "raj", "rap", "rar", "roh", "run", "ron", "roa", "rom", 287/* "ru", "rup", "rw", "sa", "sad", "sah", "sai", "sal", "sam", */ 288 "rus", "rup", "kin", "san", "sad", "sah", "sai", "sal", "sam", 289/* "sas", "sat", "sc", "scn", "sco", "sd", "se", "sel", "sem", */ 290 "sas", "sat", "srd", "scn", "sco", "snd", "sme", "sel", "sem", 291/* "sg", "sga", "sgn", "shn", "si", "sid", "sio", "sit", */ 292 "sag", "sga", "sgn", "shn", "sin", "sid", "sio", "sit", 293/* "sk", "sl", "sla", "sm", "sma", "smi", "smj", "smn", */ 294 "slk", "slv", "sla", "smo", "sma", "smi", "smj", "smn", 295/* "sms", "sn", "snk", "so", "sog", "son", "sq", "sr", */ 296 "sms", "sna", "snk", "som", "sog", "son", "sqi", "srp", 297/* "srn", "srr", "ss", "ssa", "st", "su", "suk", "sus", "sux", */ 298 "srn", "srr", "ssw", "ssa", "sot", "sun", "suk", "sus", "sux", 299/* "sv", "sw", "syc", "syr", "ta", "tai", "te", "tem", "ter", */ 300 "swe", "swa", "syc", "syr", "tam", "tai", "tel", "tem", "ter", 301/* "tet", "tg", "th", "ti", "tig", "tiv", "tk", "tkl", */ 302 "tet", "tgk", "tha", "tir", "tig", "tiv", "tuk", "tkl", 303/* "tl", "tlh", "tli", "tmh", "tn", "to", "tog", "tpi", "tr", "trv", */ 304 "tgl", "tlh", "tli", "tmh", "tsn", "ton", "tog", "tpi", "tur", "trv", 305/* "ts", "tsi", "tt", "tum", "tup", "tut", "tvl", "tw", */ 306 "tso", "tsi", "tat", "tum", "tup", "tut", "tvl", "twi", 307/* "ty", "tyv", "udm", "ug", "uga", "uk", "umb", "und", "ur", */ 308 "tah", "tyv", "udm", "uig", "uga", "ukr", "umb", "und", "urd", 309/* "uz", "vai", "ve", "vi", "vo", "vot", "wa", "wak", */ 310 "uzb", "vai", "ven", "vie", "vol", "vot", "wln", "wak", 311/* "wal", "war", "was", "wen", "wo", "xal", "xh", "yao", "yap", */ 312 "wal", "war", "was", "wen", "wol", "xal", "xho", "yao", "yap", 313/* "yi", "yo", "ypk", "za", "zap", "zbl", "zen", "zh", "znd", */ 314 "yid", "yor", "ypk", "zha", "zap", "zbl", "zen", "zho", "znd", 315/* "zu", "zun", "zxx", "zza", */ 316 "zul", "zun", "zxx", "zza", 317NULL, 318/* "in", "iw", "ji", "jw", "sh", */ 319 "ind", "heb", "yid", "jaw", "srp", 320NULL 321}; 322 323/** 324 * Table of 2-letter country codes. 325 * 326 * This list must be in sorted order. This list is returned directly 327 * to the user by some API. 328 * 329 * This list must be kept in sync with COUNTRIES_3, with corresponding 330 * entries matched. 331 * 332 * This table should be terminated with a NULL entry, followed by a 333 * second list, and another NULL entry. The first list is visible to 334 * user code when this array is returned by API. The second list 335 * contains codes we support, but do not expose through user API. 336 * 337 * Notes: 338 * 339 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per 340 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added 341 * new codes keeping the old ones for compatibility updated to include 342 * 1999/12/03 revisions *CWB* 343 * 344 * RO(ROM) is now RO(ROU) according to 345 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html 346 */ 347static const char * const COUNTRIES[] = { 348 "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", 349 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", 350 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", 351 "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", 352 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", 353 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", 354 "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", 355 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", 356 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", 357 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", 358 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", 359 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", 360 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", 361 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", 362 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", 363 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", 364 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", 365 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", 366 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", 367 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", 368 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", 369 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", 370 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", 371 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", 372 "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", 373 "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", 374 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", 375 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", 376 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", 377 "WS", "YE", "YT", "ZA", "ZM", "ZW", 378NULL, 379 "FX", "CS", "RO", "TP", "YU", "ZR", /* obsolete country codes */ 380NULL 381}; 382 383static const char* const DEPRECATED_COUNTRIES[] ={ 384 "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR", NULL, NULL /* deprecated country list */ 385}; 386static const char* const REPLACEMENT_COUNTRIES[] = { 387/* "BU", "CS", "DY", "FX", "HV", "NH", "RH", "TP", "YU", "ZR" */ 388 "MM", "RS", "BJ", "FR", "BF", "VU", "ZW", "TL", "RS", "CD", NULL, NULL /* replacement country codes */ 389}; 390 391/** 392 * Table of 3-letter country codes. 393 * 394 * This is a lookup table used to convert 3-letter country codes to 395 * their 2-letter equivalent. It must be kept in sync with COUNTRIES. 396 * For all valid i, COUNTRIES[i] must refer to the same country as 397 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES 398 * to make eyeballing this baby easier. 399 * 400 * This table should be terminated with a NULL entry, followed by a 401 * second list, and another NULL entry. The two lists correspond to 402 * the two lists in COUNTRIES. 403 */ 404static const char * const COUNTRIES_3[] = { 405/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", "AN", */ 406 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", "ANT", 407/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ 408 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", 409/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ 410 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", 411/* "BJ", "BL", "BM", "BN", "BO", "BR", "BS", "BT", "BV", */ 412 "BEN", "BLM", "BMU", "BRN", "BOL", "BRA", "BHS", "BTN", "BVT", 413/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ 414 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", 415/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ 416 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", 417/* "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", */ 418 "CUB", "CPV", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", 419/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ 420 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", 421/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ 422 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", 423/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ 424 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", 425/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ 426 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", 427/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ 428 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", 429/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ 430 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", 431/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ 432 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", 433/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ 434 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", 435/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ 436 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", 437/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ 438 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", 439/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ 440 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", 441/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ 442 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", 443/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ 444 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", 445/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ 446 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", 447/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ 448 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", 449/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ 450 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", 451/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ 452 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", 453/* "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", */ 454 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "STP", "SLV", 455/* "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ 456 "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", 457/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ 458 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", 459/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ 460 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", 461/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ 462 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", 463/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ 464 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", 465NULL, 466/* "FX", "CS", "RO", "TP", "YU", "ZR", */ 467 "FXX", "SCG", "ROM", "TMP", "YUG", "ZAR", 468NULL 469}; 470 471typedef struct CanonicalizationMap { 472 const char *id; /* input ID */ 473 const char *canonicalID; /* canonicalized output ID */ 474 const char *keyword; /* keyword, or NULL if none */ 475 const char *value; /* keyword value, or NULL if kw==NULL */ 476} CanonicalizationMap; 477 478/** 479 * A map to canonicalize locale IDs. This handles a variety of 480 * different semantic kinds of transformations. 481 */ 482static const CanonicalizationMap CANONICALIZE_MAP[] = { 483 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */ 484 { "C", "en_US_POSIX", NULL, NULL }, /* POSIX name */ 485 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */ 486 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */ 487 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */ 488 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */ 489 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" }, 490 { "cel_GAULISH", "cel__GAULISH", NULL, NULL }, /* registered name */ 491 { "de_1901", "de__1901", NULL, NULL }, /* registered name */ 492 { "de_1906", "de__1906", NULL, NULL }, /* registered name */ 493 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */ 494 { "de_AT_PREEURO", "de_AT", "currency", "ATS" }, 495 { "de_DE_PREEURO", "de_DE", "currency", "DEM" }, 496 { "de_LU_PREEURO", "de_LU", "currency", "LUF" }, 497 { "el_GR_PREEURO", "el_GR", "currency", "GRD" }, 498 { "en_BOONT", "en__BOONT", NULL, NULL }, /* registered name */ 499 { "en_SCOUSE", "en__SCOUSE", NULL, NULL }, /* registered name */ 500 { "en_BE_PREEURO", "en_BE", "currency", "BEF" }, 501 { "en_IE_PREEURO", "en_IE", "currency", "IEP" }, 502 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */ 503 { "es_ES_PREEURO", "es_ES", "currency", "ESP" }, 504 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" }, 505 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" }, 506 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" }, 507 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" }, 508 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" }, 509 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" }, 510 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" }, 511 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */ 512 { "it_IT_PREEURO", "it_IT", "currency", "ITL" }, 513 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */ 514 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */ 515 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" }, 516 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" }, 517 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" }, 518 { "sl_ROZAJ", "sl__ROZAJ", NULL, NULL }, /* registered name */ 519 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */ 520 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */ 521 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */ 522 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */ 523 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */ 524 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */ 525 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */ 526 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */ 527 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */ 528 { "zh_GAN", "zh__GAN", NULL, NULL }, /* registered name */ 529 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */ 530 { "zh_HAKKA", "zh__HAKKA", NULL, NULL }, /* registered name */ 531 { "zh_MIN", "zh__MIN", NULL, NULL }, /* registered name */ 532 { "zh_MIN_NAN", "zh__MINNAN", NULL, NULL }, /* registered name */ 533 { "zh_WUU", "zh__WUU", NULL, NULL }, /* registered name */ 534 { "zh_XIANG", "zh__XIANG", NULL, NULL }, /* registered name */ 535 { "zh_YUE", "zh__YUE", NULL, NULL }, /* registered name */ 536}; 537 538typedef struct VariantMap { 539 const char *variant; /* input ID */ 540 const char *keyword; /* keyword, or NULL if none */ 541 const char *value; /* keyword value, or NULL if kw==NULL */ 542} VariantMap; 543 544static const VariantMap VARIANT_MAP[] = { 545 { "EURO", "currency", "EUR" }, 546 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */ 547 { "STROKE", "collation", "stroke" } /* Solaris variant */ 548}; 549 550/* ### Keywords **************************************************/ 551 552#define ULOC_KEYWORD_BUFFER_LEN 25 553#define ULOC_MAX_NO_KEYWORDS 25 554 555static const char * 556locale_getKeywordsStart(const char *localeID) { 557 const char *result = NULL; 558 if((result = uprv_strchr(localeID, '@')) != NULL) { 559 return result; 560 } 561#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) 562 else { 563 /* We do this because the @ sign is variant, and the @ sign used on one 564 EBCDIC machine won't be compiled the same way on other EBCDIC based 565 machines. */ 566 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; 567 const uint8_t *charToFind = ebcdicSigns; 568 while(*charToFind) { 569 if((result = uprv_strchr(localeID, *charToFind)) != NULL) { 570 return result; 571 } 572 charToFind++; 573 } 574 } 575#endif 576 return NULL; 577} 578 579/** 580 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] 581 * @param keywordName incoming name to be canonicalized 582 * @param status return status (keyword too long) 583 * @return length of the keyword name 584 */ 585static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) 586{ 587 int32_t i; 588 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName); 589 590 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) { 591 /* keyword name too long for internal buffer */ 592 *status = U_INTERNAL_PROGRAM_ERROR; 593 return 0; 594 } 595 596 /* normalize the keyword name */ 597 for(i = 0; i < keywordNameLen; i++) { 598 buf[i] = uprv_tolower(keywordName[i]); 599 } 600 buf[i] = 0; 601 602 return keywordNameLen; 603} 604 605typedef struct { 606 char keyword[ULOC_KEYWORD_BUFFER_LEN]; 607 int32_t keywordLen; 608 const char *valueStart; 609 int32_t valueLen; 610} KeywordStruct; 611 612static int32_t U_CALLCONV 613compareKeywordStructs(const void *context, const void *left, const void *right) { 614 const char* leftString = ((const KeywordStruct *)left)->keyword; 615 const char* rightString = ((const KeywordStruct *)right)->keyword; 616 return uprv_strcmp(leftString, rightString); 617} 618 619/** 620 * Both addKeyword and addValue must already be in canonical form. 621 * Either both addKeyword and addValue are NULL, or neither is NULL. 622 * If they are not NULL they must be zero terminated. 623 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword. 624 */ 625static int32_t 626_getKeywords(const char *localeID, 627 char prev, 628 char *keywords, int32_t keywordCapacity, 629 char *values, int32_t valuesCapacity, int32_t *valLen, 630 UBool valuesToo, 631 const char* addKeyword, 632 const char* addValue, 633 UErrorCode *status) 634{ 635 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; 636 637 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; 638 int32_t numKeywords = 0; 639 const char* pos = localeID; 640 const char* equalSign = NULL; 641 const char* semicolon = NULL; 642 int32_t i = 0, j, n; 643 int32_t keywordsLen = 0; 644 int32_t valuesLen = 0; 645 646 if(prev == '@') { /* start of keyword definition */ 647 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ 648 do { 649 UBool duplicate = FALSE; 650 /* skip leading spaces */ 651 while(*pos == ' ') { 652 pos++; 653 } 654 if (!*pos) { /* handle trailing "; " */ 655 break; 656 } 657 if(numKeywords == maxKeywords) { 658 *status = U_INTERNAL_PROGRAM_ERROR; 659 return 0; 660 } 661 equalSign = uprv_strchr(pos, '='); 662 semicolon = uprv_strchr(pos, ';'); 663 /* lack of '=' [foo@currency] is illegal */ 664 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ 665 if(!equalSign || (semicolon && semicolon<equalSign)) { 666 *status = U_INVALID_FORMAT_ERROR; 667 return 0; 668 } 669 /* need to normalize both keyword and keyword name */ 670 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) { 671 /* keyword name too long for internal buffer */ 672 *status = U_INTERNAL_PROGRAM_ERROR; 673 return 0; 674 } 675 for(i = 0, n = 0; i < equalSign - pos; ++i) { 676 if (pos[i] != ' ') { 677 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); 678 } 679 } 680 keywordList[numKeywords].keyword[n] = 0; 681 keywordList[numKeywords].keywordLen = n; 682 /* now grab the value part. First we skip the '=' */ 683 equalSign++; 684 /* then we leading spaces */ 685 while(*equalSign == ' ') { 686 equalSign++; 687 } 688 keywordList[numKeywords].valueStart = equalSign; 689 690 pos = semicolon; 691 i = 0; 692 if(pos) { 693 while(*(pos - i - 1) == ' ') { 694 i++; 695 } 696 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); 697 pos++; 698 } else { 699 i = (int32_t)uprv_strlen(equalSign); 700 while(equalSign[i-1] == ' ') { 701 i--; 702 } 703 keywordList[numKeywords].valueLen = i; 704 } 705 /* If this is a duplicate keyword, then ignore it */ 706 for (j=0; j<numKeywords; ++j) { 707 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) { 708 duplicate = TRUE; 709 break; 710 } 711 } 712 if (!duplicate) { 713 ++numKeywords; 714 } 715 } while(pos); 716 717 /* Handle addKeyword/addValue. */ 718 if (addKeyword != NULL) { 719 UBool duplicate = FALSE; 720 U_ASSERT(addValue != NULL); 721 /* Search for duplicate; if found, do nothing. Explicit keyword 722 overrides addKeyword. */ 723 for (j=0; j<numKeywords; ++j) { 724 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) { 725 duplicate = TRUE; 726 break; 727 } 728 } 729 if (!duplicate) { 730 if (numKeywords == maxKeywords) { 731 *status = U_INTERNAL_PROGRAM_ERROR; 732 return 0; 733 } 734 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword); 735 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword); 736 keywordList[numKeywords].valueStart = addValue; 737 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue); 738 ++numKeywords; 739 } 740 } else { 741 U_ASSERT(addValue == NULL); 742 } 743 744 /* now we have a list of keywords */ 745 /* we need to sort it */ 746 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status); 747 748 /* Now construct the keyword part */ 749 for(i = 0; i < numKeywords; i++) { 750 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) { 751 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword); 752 if(valuesToo) { 753 keywords[keywordsLen + keywordList[i].keywordLen] = '='; 754 } else { 755 keywords[keywordsLen + keywordList[i].keywordLen] = 0; 756 } 757 } 758 keywordsLen += keywordList[i].keywordLen + 1; 759 if(valuesToo) { 760 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) { 761 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen); 762 } 763 keywordsLen += keywordList[i].valueLen; 764 765 if(i < numKeywords - 1) { 766 if(keywordsLen < keywordCapacity) { 767 keywords[keywordsLen] = ';'; 768 } 769 keywordsLen++; 770 } 771 } 772 if(values) { 773 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) { 774 uprv_strcpy(values+valuesLen, keywordList[i].valueStart); 775 values[valuesLen + keywordList[i].valueLen] = 0; 776 } 777 valuesLen += keywordList[i].valueLen + 1; 778 } 779 } 780 if(values) { 781 values[valuesLen] = 0; 782 if(valLen) { 783 *valLen = valuesLen; 784 } 785 } 786 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status); 787 } else { 788 return 0; 789 } 790} 791 792U_CFUNC int32_t 793locale_getKeywords(const char *localeID, 794 char prev, 795 char *keywords, int32_t keywordCapacity, 796 char *values, int32_t valuesCapacity, int32_t *valLen, 797 UBool valuesToo, 798 UErrorCode *status) { 799 return _getKeywords(localeID, prev, keywords, keywordCapacity, 800 values, valuesCapacity, valLen, valuesToo, 801 NULL, NULL, status); 802} 803 804U_CAPI int32_t U_EXPORT2 805uloc_getKeywordValue(const char* localeID, 806 const char* keywordName, 807 char* buffer, int32_t bufferCapacity, 808 UErrorCode* status) 809{ 810 const char* nextSeparator = NULL; 811 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 812 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 813 int32_t i = 0; 814 int32_t result = 0; 815 816 if(status && U_SUCCESS(*status) && localeID) { 817 818 const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */ 819 if(startSearchHere == NULL) { 820 /* no keywords, return at once */ 821 return 0; 822 } 823 824 locale_canonKeywordName(keywordNameBuffer, keywordName, status); 825 if(U_FAILURE(*status)) { 826 return 0; 827 } 828 829 /* find the first keyword */ 830 while(startSearchHere) { 831 startSearchHere++; 832 /* skip leading spaces (allowed?) */ 833 while(*startSearchHere == ' ') { 834 startSearchHere++; 835 } 836 nextSeparator = uprv_strchr(startSearchHere, '='); 837 /* need to normalize both keyword and keyword name */ 838 if(!nextSeparator) { 839 break; 840 } 841 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) { 842 /* keyword name too long for internal buffer */ 843 *status = U_INTERNAL_PROGRAM_ERROR; 844 return 0; 845 } 846 for(i = 0; i < nextSeparator - startSearchHere; i++) { 847 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]); 848 } 849 /* trim trailing spaces */ 850 while(startSearchHere[i-1] == ' ') { 851 i--; 852 } 853 localeKeywordNameBuffer[i] = 0; 854 855 startSearchHere = uprv_strchr(nextSeparator, ';'); 856 857 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { 858 nextSeparator++; 859 while(*nextSeparator == ' ') { 860 nextSeparator++; 861 } 862 /* we actually found the keyword. Copy the value */ 863 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) { 864 while(*(startSearchHere-1) == ' ') { 865 startSearchHere--; 866 } 867 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator); 868 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status); 869 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */ 870 i = (int32_t)uprv_strlen(nextSeparator); 871 while(nextSeparator[i - 1] == ' ') { 872 i--; 873 } 874 uprv_strncpy(buffer, nextSeparator, i); 875 result = u_terminateChars(buffer, bufferCapacity, i, status); 876 } else { 877 /* give a bigger buffer, please */ 878 *status = U_BUFFER_OVERFLOW_ERROR; 879 if(startSearchHere) { 880 result = (int32_t)(startSearchHere - nextSeparator); 881 } else { 882 result = (int32_t)uprv_strlen(nextSeparator); 883 } 884 } 885 return result; 886 } 887 } 888 } 889 return 0; 890} 891 892U_CAPI int32_t U_EXPORT2 893uloc_setKeywordValue(const char* keywordName, 894 const char* keywordValue, 895 char* buffer, int32_t bufferCapacity, 896 UErrorCode* status) 897{ 898 /* TODO: sorting. removal. */ 899 int32_t keywordNameLen; 900 int32_t keywordValueLen; 901 int32_t bufLen; 902 int32_t needLen = 0; 903 int32_t foundValueLen; 904 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */ 905 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 906 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 907 int32_t i = 0; 908 int32_t rc; 909 char* nextSeparator = NULL; 910 char* nextEqualsign = NULL; 911 char* startSearchHere = NULL; 912 char* keywordStart = NULL; 913 char *insertHere = NULL; 914 if(U_FAILURE(*status)) { 915 return -1; 916 } 917 if(bufferCapacity>1) { 918 bufLen = (int32_t)uprv_strlen(buffer); 919 } else { 920 *status = U_ILLEGAL_ARGUMENT_ERROR; 921 return 0; 922 } 923 if(bufferCapacity<bufLen) { 924 /* The capacity is less than the length?! Is this NULL terminated? */ 925 *status = U_ILLEGAL_ARGUMENT_ERROR; 926 return 0; 927 } 928 if(keywordValue && !*keywordValue) { 929 keywordValue = NULL; 930 } 931 if(keywordValue) { 932 keywordValueLen = (int32_t)uprv_strlen(keywordValue); 933 } else { 934 keywordValueLen = 0; 935 } 936 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status); 937 if(U_FAILURE(*status)) { 938 return 0; 939 } 940 startSearchHere = (char*)locale_getKeywordsStart(buffer); 941 if(startSearchHere == NULL || (startSearchHere[1]==0)) { 942 if(!keywordValue) { /* no keywords = nothing to remove */ 943 return bufLen; 944 } 945 946 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 947 if(startSearchHere) { /* had a single @ */ 948 needLen--; /* already had the @ */ 949 /* startSearchHere points at the @ */ 950 } else { 951 startSearchHere=buffer+bufLen; 952 } 953 if(needLen >= bufferCapacity) { 954 *status = U_BUFFER_OVERFLOW_ERROR; 955 return needLen; /* no change */ 956 } 957 *startSearchHere = '@'; 958 startSearchHere++; 959 uprv_strcpy(startSearchHere, keywordNameBuffer); 960 startSearchHere += keywordNameLen; 961 *startSearchHere = '='; 962 startSearchHere++; 963 uprv_strcpy(startSearchHere, keywordValue); 964 startSearchHere+=keywordValueLen; 965 return needLen; 966 } /* end shortcut - no @ */ 967 968 keywordStart = startSearchHere; 969 /* search for keyword */ 970 while(keywordStart) { 971 keywordStart++; 972 /* skip leading spaces (allowed?) */ 973 while(*keywordStart == ' ') { 974 keywordStart++; 975 } 976 nextEqualsign = uprv_strchr(keywordStart, '='); 977 /* need to normalize both keyword and keyword name */ 978 if(!nextEqualsign) { 979 break; 980 } 981 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) { 982 /* keyword name too long for internal buffer */ 983 *status = U_INTERNAL_PROGRAM_ERROR; 984 return 0; 985 } 986 for(i = 0; i < nextEqualsign - keywordStart; i++) { 987 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]); 988 } 989 /* trim trailing spaces */ 990 while(keywordStart[i-1] == ' ') { 991 i--; 992 } 993 localeKeywordNameBuffer[i] = 0; 994 995 nextSeparator = uprv_strchr(nextEqualsign, ';'); 996 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); 997 if(rc == 0) { 998 nextEqualsign++; 999 while(*nextEqualsign == ' ') { 1000 nextEqualsign++; 1001 } 1002 /* we actually found the keyword. Change the value */ 1003 if (nextSeparator) { 1004 keywordAtEnd = 0; 1005 foundValueLen = (int32_t)(nextSeparator - nextEqualsign); 1006 } else { 1007 keywordAtEnd = 1; 1008 foundValueLen = (int32_t)uprv_strlen(nextEqualsign); 1009 } 1010 if(keywordValue) { /* adding a value - not removing */ 1011 if(foundValueLen == keywordValueLen) { 1012 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1013 return bufLen; /* no change in size */ 1014 } else if(foundValueLen > keywordValueLen) { 1015 int32_t delta = foundValueLen - keywordValueLen; 1016 if(nextSeparator) { /* RH side */ 1017 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer)); 1018 } 1019 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1020 bufLen -= delta; 1021 buffer[bufLen]=0; 1022 return bufLen; 1023 } else { /* FVL < KVL */ 1024 int32_t delta = keywordValueLen - foundValueLen; 1025 if((bufLen+delta) >= bufferCapacity) { 1026 *status = U_BUFFER_OVERFLOW_ERROR; 1027 return bufLen+delta; 1028 } 1029 if(nextSeparator) { /* RH side */ 1030 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer)); 1031 } 1032 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1033 bufLen += delta; 1034 buffer[bufLen]=0; 1035 return bufLen; 1036 } 1037 } else { /* removing a keyword */ 1038 if(keywordAtEnd) { 1039 /* zero out the ';' or '@' just before startSearchhere */ 1040 keywordStart[-1] = 0; 1041 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */ 1042 } else { 1043 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer)); 1044 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0; 1045 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart)); 1046 } 1047 } 1048 } else if(rc<0){ /* end match keyword */ 1049 /* could insert at this location. */ 1050 insertHere = keywordStart; 1051 } 1052 keywordStart = nextSeparator; 1053 } /* end loop searching */ 1054 1055 if(!keywordValue) { 1056 return bufLen; /* removal of non-extant keyword - no change */ 1057 } 1058 1059 /* we know there is at least one keyword. */ 1060 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 1061 if(needLen >= bufferCapacity) { 1062 *status = U_BUFFER_OVERFLOW_ERROR; 1063 return needLen; /* no change */ 1064 } 1065 1066 if(insertHere) { 1067 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer)); 1068 keywordStart = insertHere; 1069 } else { 1070 keywordStart = buffer+bufLen; 1071 *keywordStart = ';'; 1072 keywordStart++; 1073 } 1074 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen); 1075 keywordStart += keywordNameLen; 1076 *keywordStart = '='; 1077 keywordStart++; 1078 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */ 1079 keywordStart+=keywordValueLen; 1080 if(insertHere) { 1081 *keywordStart = ';'; 1082 keywordStart++; 1083 } 1084 buffer[needLen]=0; 1085 return needLen; 1086} 1087 1088/* ### ID parsing implementation **************************************************/ 1089 1090/*returns TRUE if a is an ID separator FALSE otherwise*/ 1091#define _isIDSeparator(a) (a == '_' || a == '-') 1092 1093#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) 1094 1095/*returns TRUE if one of the special prefixes is here (s=string) 1096 'x-' or 'i-' */ 1097#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) 1098 1099/* Dot terminates it because of POSIX form where dot precedes the codepage 1100 * except for variant 1101 */ 1102#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) 1103 1104static char* _strnchr(const char* str, int32_t len, char c) { 1105 U_ASSERT(str != 0 && len >= 0); 1106 while (len-- != 0) { 1107 char d = *str; 1108 if (d == c) { 1109 return (char*) str; 1110 } else if (d == 0) { 1111 break; 1112 } 1113 ++str; 1114 } 1115 return NULL; 1116} 1117 1118/** 1119 * Lookup 'key' in the array 'list'. The array 'list' should contain 1120 * a NULL entry, followed by more entries, and a second NULL entry. 1121 * 1122 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or 1123 * COUNTRIES_3. 1124 */ 1125static int16_t _findIndex(const char* const* list, const char* key) 1126{ 1127 const char* const* anchor = list; 1128 int32_t pass = 0; 1129 1130 /* Make two passes through two NULL-terminated arrays at 'list' */ 1131 while (pass++ < 2) { 1132 while (*list) { 1133 if (uprv_strcmp(key, *list) == 0) { 1134 return (int16_t)(list - anchor); 1135 } 1136 list++; 1137 } 1138 ++list; /* skip final NULL *CWB*/ 1139 } 1140 return -1; 1141} 1142 1143/* count the length of src while copying it to dest; return strlen(src) */ 1144static U_INLINE int32_t 1145_copyCount(char *dest, int32_t destCapacity, const char *src) { 1146 const char *anchor; 1147 char c; 1148 1149 anchor=src; 1150 for(;;) { 1151 if((c=*src)==0) { 1152 return (int32_t)(src-anchor); 1153 } 1154 if(destCapacity<=0) { 1155 return (int32_t)((src-anchor)+uprv_strlen(src)); 1156 } 1157 ++src; 1158 *dest++=c; 1159 --destCapacity; 1160 } 1161} 1162 1163static const char* 1164uloc_getCurrentCountryID(const char* oldID){ 1165 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); 1166 if (offset >= 0) { 1167 return REPLACEMENT_COUNTRIES[offset]; 1168 } 1169 return oldID; 1170} 1171static const char* 1172uloc_getCurrentLanguageID(const char* oldID){ 1173 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID); 1174 if (offset >= 0) { 1175 return REPLACEMENT_LANGUAGES[offset]; 1176 } 1177 return oldID; 1178} 1179/* 1180 * the internal functions _getLanguage(), _getCountry(), _getVariant() 1181 * avoid duplicating code to handle the earlier locale ID pieces 1182 * in the functions for the later ones by 1183 * setting the *pEnd pointer to where they stopped parsing 1184 * 1185 * TODO try to use this in Locale 1186 */ 1187static int32_t 1188_getLanguage(const char *localeID, 1189 char *language, int32_t languageCapacity, 1190 const char **pEnd) { 1191 int32_t i=0; 1192 int32_t offset; 1193 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */ 1194 1195 /* if it starts with i- or x- then copy that prefix */ 1196 if(_isIDPrefix(localeID)) { 1197 if(i<languageCapacity) { 1198 language[i]=(char)uprv_tolower(*localeID); 1199 } 1200 if(i<languageCapacity) { 1201 language[i+1]='-'; 1202 } 1203 i+=2; 1204 localeID+=2; 1205 } 1206 1207 /* copy the language as far as possible and count its length */ 1208 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { 1209 if(i<languageCapacity) { 1210 language[i]=(char)uprv_tolower(*localeID); 1211 } 1212 if(i<3) { 1213 lang[i]=(char)uprv_tolower(*localeID); 1214 } 1215 i++; 1216 localeID++; 1217 } 1218 1219 if(i==3) { 1220 /* convert 3 character code to 2 character code if possible *CWB*/ 1221 offset=_findIndex(LANGUAGES_3, lang); 1222 if(offset>=0) { 1223 i=_copyCount(language, languageCapacity, LANGUAGES[offset]); 1224 } 1225 } 1226 1227 if(pEnd!=NULL) { 1228 *pEnd=localeID; 1229 } 1230 return i; 1231} 1232 1233static int32_t 1234_getScript(const char *localeID, 1235 char *script, int32_t scriptCapacity, 1236 const char **pEnd) 1237{ 1238 int32_t idLen = 0; 1239 1240 if (pEnd != NULL) { 1241 *pEnd = localeID; 1242 } 1243 1244 /* copy the second item as far as possible and count its length */ 1245 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { 1246 idLen++; 1247 } 1248 1249 /* If it's exactly 4 characters long, then it's a script and not a country. */ 1250 if (idLen == 4) { 1251 int32_t i; 1252 if (pEnd != NULL) { 1253 *pEnd = localeID+idLen; 1254 } 1255 if(idLen > scriptCapacity) { 1256 idLen = scriptCapacity; 1257 } 1258 if (idLen >= 1) { 1259 script[0]=(char)uprv_toupper(*(localeID++)); 1260 } 1261 for (i = 1; i < idLen; i++) { 1262 script[i]=(char)uprv_tolower(*(localeID++)); 1263 } 1264 } 1265 else { 1266 idLen = 0; 1267 } 1268 return idLen; 1269} 1270 1271static int32_t 1272_getCountry(const char *localeID, 1273 char *country, int32_t countryCapacity, 1274 const char **pEnd) 1275{ 1276 int32_t i=0; 1277 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 }; 1278 int32_t offset; 1279 1280 /* copy the country as far as possible and count its length */ 1281 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { 1282 if(i<countryCapacity) { 1283 country[i]=(char)uprv_toupper(*localeID); 1284 } 1285 if(i<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/ 1286 cnty[i]=(char)uprv_toupper(*localeID); 1287 } 1288 i++; 1289 localeID++; 1290 } 1291 1292 /* convert 3 character code to 2 character code if possible *CWB*/ 1293 if(i==3) { 1294 offset=_findIndex(COUNTRIES_3, cnty); 1295 if(offset>=0) { 1296 i=_copyCount(country, countryCapacity, COUNTRIES[offset]); 1297 } 1298 } 1299 1300 if(pEnd!=NULL) { 1301 *pEnd=localeID; 1302 } 1303 return i; 1304} 1305 1306/** 1307 * @param needSeparator if true, then add leading '_' if any variants 1308 * are added to 'variant' 1309 */ 1310static int32_t 1311_getVariantEx(const char *localeID, 1312 char prev, 1313 char *variant, int32_t variantCapacity, 1314 UBool needSeparator) { 1315 int32_t i=0; 1316 1317 /* get one or more variant tags and separate them with '_' */ 1318 if(_isIDSeparator(prev)) { 1319 /* get a variant string after a '-' or '_' */ 1320 while(!_isTerminator(*localeID)) { 1321 if (needSeparator) { 1322 if (i<variantCapacity) { 1323 variant[i] = '_'; 1324 } 1325 ++i; 1326 needSeparator = FALSE; 1327 } 1328 if(i<variantCapacity) { 1329 variant[i]=(char)uprv_toupper(*localeID); 1330 if(variant[i]=='-') { 1331 variant[i]='_'; 1332 } 1333 } 1334 i++; 1335 localeID++; 1336 } 1337 } 1338 1339 /* if there is no variant tag after a '-' or '_' then look for '@' */ 1340 if(i==0) { 1341 if(prev=='@') { 1342 /* keep localeID */ 1343 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) { 1344 ++localeID; /* point after the '@' */ 1345 } else { 1346 return 0; 1347 } 1348 while(!_isTerminator(*localeID)) { 1349 if (needSeparator) { 1350 if (i<variantCapacity) { 1351 variant[i] = '_'; 1352 } 1353 ++i; 1354 needSeparator = FALSE; 1355 } 1356 if(i<variantCapacity) { 1357 variant[i]=(char)uprv_toupper(*localeID); 1358 if(variant[i]=='-' || variant[i]==',') { 1359 variant[i]='_'; 1360 } 1361 } 1362 i++; 1363 localeID++; 1364 } 1365 } 1366 1367 return i; 1368} 1369 1370static int32_t 1371_getVariant(const char *localeID, 1372 char prev, 1373 char *variant, int32_t variantCapacity) { 1374 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE); 1375} 1376 1377/** 1378 * Delete ALL instances of a variant from the given list of one or 1379 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR". 1380 * @param variants the source string of one or more variants, 1381 * separated by '_'. This will be MODIFIED IN PLACE. Not zero 1382 * terminated; if it is, trailing zero will NOT be maintained. 1383 * @param variantsLen length of variants 1384 * @param toDelete variant to delete, without separators, e.g. "EURO" 1385 * or "PREEURO"; not zero terminated 1386 * @param toDeleteLen length of toDelete 1387 * @return number of characters deleted from variants 1388 */ 1389static int32_t 1390_deleteVariant(char* variants, int32_t variantsLen, 1391 const char* toDelete, int32_t toDeleteLen) 1392{ 1393 int32_t delta = 0; /* number of chars deleted */ 1394 for (;;) { 1395 UBool flag = FALSE; 1396 if (variantsLen < toDeleteLen) { 1397 return delta; 1398 } 1399 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 && 1400 (variantsLen == toDeleteLen || 1401 (flag=(variants[toDeleteLen] == '_')))) 1402 { 1403 int32_t d = toDeleteLen + (flag?1:0); 1404 variantsLen -= d; 1405 delta += d; 1406 if (variantsLen > 0) { 1407 uprv_memmove(variants, variants+d, variantsLen); 1408 } 1409 } else { 1410 char* p = _strnchr(variants, variantsLen, '_'); 1411 if (p == NULL) { 1412 return delta; 1413 } 1414 ++p; 1415 variantsLen -= (int32_t)(p - variants); 1416 variants = p; 1417 } 1418 } 1419} 1420 1421/* Keyword enumeration */ 1422 1423typedef struct UKeywordsContext { 1424 char* keywords; 1425 char* current; 1426} UKeywordsContext; 1427 1428static void U_CALLCONV 1429uloc_kw_closeKeywords(UEnumeration *enumerator) { 1430 uprv_free(((UKeywordsContext *)enumerator->context)->keywords); 1431 uprv_free(enumerator->context); 1432 uprv_free(enumerator); 1433} 1434 1435static int32_t U_CALLCONV 1436uloc_kw_countKeywords(UEnumeration *en, UErrorCode *status) { 1437 char *kw = ((UKeywordsContext *)en->context)->keywords; 1438 int32_t result = 0; 1439 while(*kw) { 1440 result++; 1441 kw += uprv_strlen(kw)+1; 1442 } 1443 return result; 1444} 1445 1446static const char* U_CALLCONV 1447uloc_kw_nextKeyword(UEnumeration* en, 1448 int32_t* resultLength, 1449 UErrorCode* status) { 1450 const char* result = ((UKeywordsContext *)en->context)->current; 1451 int32_t len = 0; 1452 if(*result) { 1453 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current); 1454 ((UKeywordsContext *)en->context)->current += len+1; 1455 } else { 1456 result = NULL; 1457 } 1458 if (resultLength) { 1459 *resultLength = len; 1460 } 1461 return result; 1462} 1463 1464static void U_CALLCONV 1465uloc_kw_resetKeywords(UEnumeration* en, 1466 UErrorCode* status) { 1467 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords; 1468} 1469 1470static const UEnumeration gKeywordsEnum = { 1471 NULL, 1472 NULL, 1473 uloc_kw_closeKeywords, 1474 uloc_kw_countKeywords, 1475 uenum_unextDefault, 1476 uloc_kw_nextKeyword, 1477 uloc_kw_resetKeywords 1478}; 1479 1480U_CAPI UEnumeration* U_EXPORT2 1481uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) 1482{ 1483 UKeywordsContext *myContext = NULL; 1484 UEnumeration *result = NULL; 1485 1486 if(U_FAILURE(*status)) { 1487 return NULL; 1488 } 1489 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 1490 /* Null pointer test */ 1491 if (result == NULL) { 1492 *status = U_MEMORY_ALLOCATION_ERROR; 1493 return NULL; 1494 } 1495 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration)); 1496 myContext = uprv_malloc(sizeof(UKeywordsContext)); 1497 if (myContext == NULL) { 1498 *status = U_MEMORY_ALLOCATION_ERROR; 1499 uprv_free(result); 1500 return NULL; 1501 } 1502 myContext->keywords = (char *)uprv_malloc(keywordListSize+1); 1503 uprv_memcpy(myContext->keywords, keywordList, keywordListSize); 1504 myContext->keywords[keywordListSize] = 0; 1505 myContext->current = myContext->keywords; 1506 result->context = myContext; 1507 return result; 1508} 1509 1510U_CAPI UEnumeration* U_EXPORT2 1511uloc_openKeywords(const char* localeID, 1512 UErrorCode* status) 1513{ 1514 int32_t i=0; 1515 char keywords[256]; 1516 int32_t keywordsCapacity = 256; 1517 if(status==NULL || U_FAILURE(*status)) { 1518 return 0; 1519 } 1520 1521 if(localeID==NULL) { 1522 localeID=uloc_getDefault(); 1523 } 1524 1525 /* Skip the language */ 1526 _getLanguage(localeID, NULL, 0, &localeID); 1527 if(_isIDSeparator(*localeID)) { 1528 const char *scriptID; 1529 /* Skip the script if available */ 1530 _getScript(localeID+1, NULL, 0, &scriptID); 1531 if(scriptID != localeID+1) { 1532 /* Found optional script */ 1533 localeID = scriptID; 1534 } 1535 /* Skip the Country */ 1536 if (_isIDSeparator(*localeID)) { 1537 _getCountry(localeID+1, NULL, 0, &localeID); 1538 if(_isIDSeparator(*localeID)) { 1539 _getVariant(localeID+1, *localeID, NULL, 0); 1540 } 1541 } 1542 } 1543 1544 /* keywords are located after '@' */ 1545 if((localeID = locale_getKeywordsStart(localeID)) != NULL) { 1546 i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status); 1547 } 1548 1549 if(i) { 1550 return uloc_openKeywordList(keywords, i, status); 1551 } else { 1552 return NULL; 1553 } 1554} 1555 1556 1557/* bit-flags for 'options' parameter of _canonicalize */ 1558#define _ULOC_STRIP_KEYWORDS 0x2 1559#define _ULOC_CANONICALIZE 0x1 1560 1561#define OPTION_SET(options, mask) ((options & mask) != 0) 1562 1563static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; 1564#define I_DEFAULT_LENGTH (sizeof i_default / sizeof i_default[0]) 1565 1566/** 1567 * Canonicalize the given localeID, to level 1 or to level 2, 1568 * depending on the options. To specify level 1, pass in options=0. 1569 * To specify level 2, pass in options=_ULOC_CANONICALIZE. 1570 * 1571 * This is the code underlying uloc_getName and uloc_canonicalize. 1572 */ 1573static int32_t 1574_canonicalize(const char* localeID, 1575 char* result, 1576 int32_t resultCapacity, 1577 uint32_t options, 1578 UErrorCode* err) { 1579 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity; 1580 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1581 const char* origLocaleID; 1582 const char* keywordAssign = NULL; 1583 const char* separatorIndicator = NULL; 1584 const char* addKeyword = NULL; 1585 const char* addValue = NULL; 1586 char* name; 1587 char* variant = NULL; /* pointer into name, or NULL */ 1588 1589 if (U_FAILURE(*err)) { 1590 return 0; 1591 } 1592 1593 if (localeID==NULL) { 1594 localeID=uloc_getDefault(); 1595 } 1596 origLocaleID=localeID; 1597 1598 /* if we are doing a full canonicalization, then put results in 1599 localeBuffer, if necessary; otherwise send them to result. */ 1600 if (OPTION_SET(options, _ULOC_CANONICALIZE) && 1601 (result == NULL || resultCapacity < sizeof(localeBuffer))) { 1602 name = localeBuffer; 1603 nameCapacity = sizeof(localeBuffer); 1604 } else { 1605 name = result; 1606 nameCapacity = resultCapacity; 1607 } 1608 1609 /* get all pieces, one after another, and separate with '_' */ 1610 len=_getLanguage(localeID, name, nameCapacity, &localeID); 1611 1612 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) { 1613 const char *d = uloc_getDefault(); 1614 1615 len = uprv_strlen(d); 1616 1617 if (name != NULL) { 1618 uprv_strncpy(name, d, len); 1619 } 1620 } else if(_isIDSeparator(*localeID)) { 1621 const char *scriptID; 1622 1623 ++fieldCount; 1624 if(len<nameCapacity) { 1625 name[len]='_'; 1626 } 1627 ++len; 1628 1629 scriptSize=_getScript(localeID+1, name+len, nameCapacity-len, &scriptID); 1630 if(scriptSize > 0) { 1631 /* Found optional script */ 1632 localeID = scriptID; 1633 ++fieldCount; 1634 len+=scriptSize; 1635 if (_isIDSeparator(*localeID)) { 1636 /* If there is something else, then we add the _ */ 1637 if(len<nameCapacity) { 1638 name[len]='_'; 1639 } 1640 ++len; 1641 } 1642 } 1643 1644 if (_isIDSeparator(*localeID)) { 1645 len+=_getCountry(localeID+1, name+len, nameCapacity-len, &localeID); 1646 if(_isIDSeparator(*localeID)) { 1647 ++fieldCount; 1648 if(len<nameCapacity) { 1649 name[len]='_'; 1650 } 1651 ++len; 1652 variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len); 1653 if (variantSize > 0) { 1654 variant = name+len; 1655 len += variantSize; 1656 localeID += variantSize + 1; /* skip '_' and variant */ 1657 } 1658 } 1659 } 1660 } 1661 1662 /* Copy POSIX-style charset specifier, if any [mr.utf8] */ 1663 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') { 1664 UBool done = FALSE; 1665 do { 1666 char c = *localeID; 1667 switch (c) { 1668 case 0: 1669 case '@': 1670 done = TRUE; 1671 break; 1672 default: 1673 if (len<nameCapacity) { 1674 name[len] = c; 1675 } 1676 ++len; 1677 ++localeID; 1678 break; 1679 } 1680 } while (!done); 1681 } 1682 1683 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';' 1684 After this, localeID either points to '@' or is NULL */ 1685 if ((localeID=locale_getKeywordsStart(localeID))!=NULL) { 1686 keywordAssign = uprv_strchr(localeID, '='); 1687 separatorIndicator = uprv_strchr(localeID, ';'); 1688 } 1689 1690 /* Copy POSIX-style variant, if any [mr@FOO] */ 1691 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && 1692 localeID != NULL && keywordAssign == NULL) { 1693 for (;;) { 1694 char c = *localeID; 1695 if (c == 0) { 1696 break; 1697 } 1698 if (len<nameCapacity) { 1699 name[len] = c; 1700 } 1701 ++len; 1702 ++localeID; 1703 } 1704 } 1705 1706 if (OPTION_SET(options, _ULOC_CANONICALIZE)) { 1707 /* Handle @FOO variant if @ is present and not followed by = */ 1708 if (localeID!=NULL && keywordAssign==NULL) { 1709 int32_t posixVariantSize; 1710 /* Add missing '_' if needed */ 1711 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) { 1712 do { 1713 if(len<nameCapacity) { 1714 name[len]='_'; 1715 } 1716 ++len; 1717 ++fieldCount; 1718 } while(fieldCount<2); 1719 } 1720 posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len, 1721 (UBool)(variantSize > 0)); 1722 if (posixVariantSize > 0) { 1723 if (variant == NULL) { 1724 variant = name+len; 1725 } 1726 len += posixVariantSize; 1727 variantSize += posixVariantSize; 1728 } 1729 } 1730 1731 /* Handle generic variants first */ 1732 if (variant) { 1733 for (j=0; j<(int32_t)(sizeof(VARIANT_MAP)/sizeof(VARIANT_MAP[0])); j++) { 1734 const char* variantToCompare = VARIANT_MAP[j].variant; 1735 int32_t n = (int32_t)uprv_strlen(variantToCompare); 1736 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n); 1737 len -= variantLen; 1738 if (variantLen > 0) { 1739 if (name[len-1] == '_') { /* delete trailing '_' */ 1740 --len; 1741 } 1742 addKeyword = VARIANT_MAP[j].keyword; 1743 addValue = VARIANT_MAP[j].value; 1744 break; 1745 } 1746 } 1747 if (name[len-1] == '_') { /* delete trailing '_' */ 1748 --len; 1749 } 1750 } 1751 1752 /* Look up the ID in the canonicalization map */ 1753 for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) { 1754 const char* id = CANONICALIZE_MAP[j].id; 1755 int32_t n = (int32_t)uprv_strlen(id); 1756 if (len == n && uprv_strncmp(name, id, n) == 0) { 1757 if (n == 0 && localeID != NULL) { 1758 break; /* Don't remap "" if keywords present */ 1759 } 1760 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID); 1761 if (CANONICALIZE_MAP[j].keyword) { 1762 addKeyword = CANONICALIZE_MAP[j].keyword; 1763 addValue = CANONICALIZE_MAP[j].value; 1764 } 1765 break; 1766 } 1767 } 1768 } 1769 1770 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) { 1771 if (localeID!=NULL && keywordAssign!=NULL && 1772 (!separatorIndicator || separatorIndicator > keywordAssign)) { 1773 if(len<nameCapacity) { 1774 name[len]='@'; 1775 } 1776 ++len; 1777 ++fieldCount; 1778 len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE, 1779 addKeyword, addValue, err); 1780 } else if (addKeyword != NULL) { 1781 U_ASSERT(addValue != NULL); 1782 /* inelegant but works -- later make _getKeywords do this? */ 1783 len += _copyCount(name+len, nameCapacity-len, "@"); 1784 len += _copyCount(name+len, nameCapacity-len, addKeyword); 1785 len += _copyCount(name+len, nameCapacity-len, "="); 1786 len += _copyCount(name+len, nameCapacity-len, addValue); 1787 } 1788 } 1789 1790 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) { 1791 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len); 1792 } 1793 1794 return u_terminateChars(result, resultCapacity, len, err); 1795} 1796 1797/* ### ID parsing API **************************************************/ 1798 1799U_CAPI int32_t U_EXPORT2 1800uloc_getParent(const char* localeID, 1801 char* parent, 1802 int32_t parentCapacity, 1803 UErrorCode* err) 1804{ 1805 const char *lastUnderscore; 1806 int32_t i; 1807 1808 if (U_FAILURE(*err)) 1809 return 0; 1810 1811 if (localeID == NULL) 1812 localeID = uloc_getDefault(); 1813 1814 lastUnderscore=uprv_strrchr(localeID, '_'); 1815 if(lastUnderscore!=NULL) { 1816 i=(int32_t)(lastUnderscore-localeID); 1817 } else { 1818 i=0; 1819 } 1820 1821 if(i>0 && parent != localeID) { 1822 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity)); 1823 } 1824 return u_terminateChars(parent, parentCapacity, i, err); 1825} 1826 1827U_CAPI int32_t U_EXPORT2 1828uloc_getLanguage(const char* localeID, 1829 char* language, 1830 int32_t languageCapacity, 1831 UErrorCode* err) 1832{ 1833 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/ 1834 int32_t i=0; 1835 1836 if (err==NULL || U_FAILURE(*err)) { 1837 return 0; 1838 } 1839 1840 if(localeID==NULL) { 1841 localeID=uloc_getDefault(); 1842 } 1843 1844 i=_getLanguage(localeID, language, languageCapacity, NULL); 1845 return u_terminateChars(language, languageCapacity, i, err); 1846} 1847 1848U_CAPI int32_t U_EXPORT2 1849uloc_getScript(const char* localeID, 1850 char* script, 1851 int32_t scriptCapacity, 1852 UErrorCode* err) 1853{ 1854 int32_t i=0; 1855 1856 if(err==NULL || U_FAILURE(*err)) { 1857 return 0; 1858 } 1859 1860 if(localeID==NULL) { 1861 localeID=uloc_getDefault(); 1862 } 1863 1864 /* skip the language */ 1865 _getLanguage(localeID, NULL, 0, &localeID); 1866 if(_isIDSeparator(*localeID)) { 1867 i=_getScript(localeID+1, script, scriptCapacity, NULL); 1868 } 1869 return u_terminateChars(script, scriptCapacity, i, err); 1870} 1871 1872U_CAPI int32_t U_EXPORT2 1873uloc_getCountry(const char* localeID, 1874 char* country, 1875 int32_t countryCapacity, 1876 UErrorCode* err) 1877{ 1878 int32_t i=0; 1879 1880 if(err==NULL || U_FAILURE(*err)) { 1881 return 0; 1882 } 1883 1884 if(localeID==NULL) { 1885 localeID=uloc_getDefault(); 1886 } 1887 1888 /* Skip the language */ 1889 _getLanguage(localeID, NULL, 0, &localeID); 1890 if(_isIDSeparator(*localeID)) { 1891 const char *scriptID; 1892 /* Skip the script if available */ 1893 _getScript(localeID+1, NULL, 0, &scriptID); 1894 if(scriptID != localeID+1) { 1895 /* Found optional script */ 1896 localeID = scriptID; 1897 } 1898 if(_isIDSeparator(*localeID)) { 1899 i=_getCountry(localeID+1, country, countryCapacity, NULL); 1900 } 1901 } 1902 return u_terminateChars(country, countryCapacity, i, err); 1903} 1904 1905U_CAPI int32_t U_EXPORT2 1906uloc_getVariant(const char* localeID, 1907 char* variant, 1908 int32_t variantCapacity, 1909 UErrorCode* err) 1910{ 1911 int32_t i=0; 1912 1913 if(err==NULL || U_FAILURE(*err)) { 1914 return 0; 1915 } 1916 1917 if(localeID==NULL) { 1918 localeID=uloc_getDefault(); 1919 } 1920 1921 /* Skip the language */ 1922 _getLanguage(localeID, NULL, 0, &localeID); 1923 if(_isIDSeparator(*localeID)) { 1924 const char *scriptID; 1925 /* Skip the script if available */ 1926 _getScript(localeID+1, NULL, 0, &scriptID); 1927 if(scriptID != localeID+1) { 1928 /* Found optional script */ 1929 localeID = scriptID; 1930 } 1931 /* Skip the Country */ 1932 if (_isIDSeparator(*localeID)) { 1933 _getCountry(localeID+1, NULL, 0, &localeID); 1934 if(_isIDSeparator(*localeID)) { 1935 i=_getVariant(localeID+1, *localeID, variant, variantCapacity); 1936 } 1937 } 1938 } 1939 1940 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */ 1941 /* if we do not have a variant tag yet then try a POSIX variant after '@' */ 1942/* 1943 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) { 1944 i=_getVariant(localeID+1, '@', variant, variantCapacity); 1945 } 1946*/ 1947 return u_terminateChars(variant, variantCapacity, i, err); 1948} 1949 1950U_CAPI int32_t U_EXPORT2 1951uloc_getName(const char* localeID, 1952 char* name, 1953 int32_t nameCapacity, 1954 UErrorCode* err) 1955{ 1956 return _canonicalize(localeID, name, nameCapacity, 0, err); 1957} 1958 1959U_CAPI int32_t U_EXPORT2 1960uloc_getBaseName(const char* localeID, 1961 char* name, 1962 int32_t nameCapacity, 1963 UErrorCode* err) 1964{ 1965 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err); 1966} 1967 1968U_CAPI int32_t U_EXPORT2 1969uloc_canonicalize(const char* localeID, 1970 char* name, 1971 int32_t nameCapacity, 1972 UErrorCode* err) 1973{ 1974 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err); 1975} 1976 1977U_CAPI const char* U_EXPORT2 1978uloc_getISO3Language(const char* localeID) 1979{ 1980 int16_t offset; 1981 char lang[ULOC_LANG_CAPACITY]; 1982 UErrorCode err = U_ZERO_ERROR; 1983 1984 if (localeID == NULL) 1985 { 1986 localeID = uloc_getDefault(); 1987 } 1988 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err); 1989 if (U_FAILURE(err)) 1990 return ""; 1991 offset = _findIndex(LANGUAGES, lang); 1992 if (offset < 0) 1993 return ""; 1994 return LANGUAGES_3[offset]; 1995} 1996 1997U_CAPI const char* U_EXPORT2 1998uloc_getISO3Country(const char* localeID) 1999{ 2000 int16_t offset; 2001 char cntry[ULOC_LANG_CAPACITY]; 2002 UErrorCode err = U_ZERO_ERROR; 2003 2004 if (localeID == NULL) 2005 { 2006 localeID = uloc_getDefault(); 2007 } 2008 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err); 2009 if (U_FAILURE(err)) 2010 return ""; 2011 offset = _findIndex(COUNTRIES, cntry); 2012 if (offset < 0) 2013 return ""; 2014 2015 return COUNTRIES_3[offset]; 2016} 2017 2018U_CAPI uint32_t U_EXPORT2 2019uloc_getLCID(const char* localeID) 2020{ 2021 UErrorCode status = U_ZERO_ERROR; 2022 char langID[ULOC_FULLNAME_CAPACITY]; 2023 2024 uloc_getLanguage(localeID, langID, sizeof(langID), &status); 2025 if (U_FAILURE(status)) { 2026 return 0; 2027 } 2028 2029 return uprv_convertToLCID(langID, localeID, &status); 2030} 2031 2032U_CAPI int32_t U_EXPORT2 2033uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, 2034 UErrorCode *status) 2035{ 2036 int32_t length; 2037 const char *posix = uprv_convertToPosix(hostid, status); 2038 if (U_FAILURE(*status) || posix == NULL) { 2039 return 0; 2040 } 2041 length = (int32_t)uprv_strlen(posix); 2042 if (length+1 > localeCapacity) { 2043 *status = U_BUFFER_OVERFLOW_ERROR; 2044 } 2045 else { 2046 uprv_strcpy(locale, posix); 2047 } 2048 return length; 2049} 2050 2051/* ### Default locale **************************************************/ 2052 2053U_CAPI const char* U_EXPORT2 2054uloc_getDefault() 2055{ 2056 return locale_get_default(); 2057} 2058 2059U_CAPI void U_EXPORT2 2060uloc_setDefault(const char* newDefaultLocale, 2061 UErrorCode* err) 2062{ 2063 if (U_FAILURE(*err)) 2064 return; 2065 /* the error code isn't currently used for anything by this function*/ 2066 2067 /* propagate change to C++ */ 2068 locale_set_default(newDefaultLocale); 2069} 2070 2071/* ### Display name **************************************************/ 2072 2073/* 2074 * Lookup a resource bundle table item with fallback on the table level. 2075 * Regular resource bundle lookups perform fallback to parent locale bundles 2076 * and eventually the root bundle, but only for top-level items. 2077 * This function takes the name of a top-level table and of an item in that table 2078 * and performs a lookup of both, falling back until a bundle contains a table 2079 * with this item. 2080 * 2081 * Note: Only the opening of entire bundles falls back through the default locale 2082 * before root. Once a bundle is open, item lookups do not go through the 2083 * default locale because that would result in a mix of languages that is 2084 * unpredictable to the programmer and most likely useless. 2085 */ 2086static const UChar * 2087_res_getTableStringWithFallback(const char *path, const char *locale, 2088 const char *tableKey, const char *subTableKey, 2089 const char *itemKey, 2090 int32_t *pLength, 2091 UErrorCode *pErrorCode) 2092{ 2093/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/ 2094 UResourceBundle *rb=NULL, table, subTable; 2095 const UChar *item=NULL; 2096 UErrorCode errorCode; 2097 char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0}; 2098 2099 /* 2100 * open the bundle for the current locale 2101 * this falls back through the locale's chain to root 2102 */ 2103 errorCode=U_ZERO_ERROR; 2104 rb=ures_open(path, locale, &errorCode); 2105 if(U_FAILURE(errorCode)) { 2106 /* total failure, not even root could be opened */ 2107 *pErrorCode=errorCode; 2108 return NULL; 2109 } else if(errorCode==U_USING_DEFAULT_WARNING || 2110 (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING) 2111 ) { 2112 /* set the "strongest" error code (success->fallback->default->failure) */ 2113 *pErrorCode=errorCode; 2114 } 2115 2116 for(;;){ 2117 ures_initStackObject(&table); 2118 ures_initStackObject(&subTable); 2119 ures_getByKeyWithFallback(rb, tableKey, &table, &errorCode); 2120 if (subTableKey != NULL) { 2121 /* 2122 ures_getByKeyWithFallback(&table,subTableKey, &subTable, &errorCode); 2123 item = ures_getStringByKeyWithFallback(&subTable, itemKey, pLength, &errorCode); 2124 if(U_FAILURE(errorCode)){ 2125 *pErrorCode = errorCode; 2126 } 2127 2128 break;*/ 2129 2130 ures_getByKeyWithFallback(&table,subTableKey, &table, &errorCode); 2131 } 2132 if(U_SUCCESS(errorCode)){ 2133 item = ures_getStringByKeyWithFallback(&table, itemKey, pLength, &errorCode); 2134 if(U_FAILURE(errorCode)){ 2135 const char* replacement = NULL; 2136 *pErrorCode = errorCode; /*save the errorCode*/ 2137 errorCode = U_ZERO_ERROR; 2138 /* may be a deprecated code */ 2139 if(uprv_strcmp(tableKey, "Countries")==0){ 2140 replacement = uloc_getCurrentCountryID(itemKey); 2141 }else if(uprv_strcmp(tableKey, "Languages")==0){ 2142 replacement = uloc_getCurrentLanguageID(itemKey); 2143 } 2144 /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/ 2145 if(replacement!=NULL && itemKey != replacement){ 2146 item = ures_getStringByKeyWithFallback(&table, replacement, pLength, &errorCode); 2147 if(U_SUCCESS(errorCode)){ 2148 *pErrorCode = errorCode; 2149 break; 2150 } 2151 } 2152 }else{ 2153 break; 2154 } 2155 } 2156 2157 if(U_FAILURE(errorCode)){ 2158 2159 /* still can't figure out ?.. try the fallback mechanism */ 2160 int32_t len = 0; 2161 const UChar* fallbackLocale = NULL; 2162 *pErrorCode = errorCode; 2163 errorCode = U_ZERO_ERROR; 2164 2165 fallbackLocale = ures_getStringByKeyWithFallback(&table, "Fallback", &len, &errorCode); 2166 if(U_FAILURE(errorCode)){ 2167 *pErrorCode = errorCode; 2168 break; 2169 } 2170 2171 u_UCharsToChars(fallbackLocale, explicitFallbackName, len); 2172 2173 /* guard against recursive fallback */ 2174 if(uprv_strcmp(explicitFallbackName, locale)==0){ 2175 *pErrorCode = U_INTERNAL_PROGRAM_ERROR; 2176 break; 2177 } 2178 ures_close(rb); 2179 rb = ures_open(NULL, explicitFallbackName, &errorCode); 2180 if(U_FAILURE(errorCode)){ 2181 *pErrorCode = errorCode; 2182 break; 2183 } 2184 /* succeeded in opening the fallback bundle .. continue and try to fetch the item */ 2185 }else{ 2186 break; 2187 } 2188 } 2189 /* done with the locale string - ready to close table and rb */ 2190 ures_close(&subTable); 2191 ures_close(&table); 2192 ures_close(rb); 2193 return item; 2194} 2195 2196static int32_t 2197_getStringOrCopyKey(const char *path, const char *locale, 2198 const char *tableKey, 2199 const char* subTableKey, 2200 const char *itemKey, 2201 const char *substitute, 2202 UChar *dest, int32_t destCapacity, 2203 UErrorCode *pErrorCode) { 2204 const UChar *s = NULL; 2205 int32_t length = 0; 2206 2207 if(itemKey==NULL) { 2208 /* top-level item: normal resource bundle access */ 2209 UResourceBundle *rb; 2210 2211 rb=ures_open(path, locale, pErrorCode); 2212 if(U_SUCCESS(*pErrorCode)) { 2213 s=ures_getStringByKey(rb, tableKey, &length, pErrorCode); 2214 /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */ 2215 ures_close(rb); 2216 } 2217 } else { 2218 /* Language code should not be a number. If it is, set the error code. */ 2219 if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) { 2220 *pErrorCode = U_MISSING_RESOURCE_ERROR; 2221 } else { 2222 /* second-level item, use special fallback */ 2223 s=_res_getTableStringWithFallback(path, locale, 2224 tableKey, 2225 subTableKey, 2226 itemKey, 2227 &length, 2228 pErrorCode); 2229 } 2230 } 2231 if(U_SUCCESS(*pErrorCode)) { 2232 int32_t copyLength=uprv_min(length, destCapacity); 2233 if(copyLength>0 && s != NULL) { 2234 u_memcpy(dest, s, copyLength); 2235 } 2236 } else { 2237 /* no string from a resource bundle: convert the substitute */ 2238 length=(int32_t)uprv_strlen(substitute); 2239 u_charsToUChars(substitute, dest, uprv_min(length, destCapacity)); 2240 *pErrorCode=U_USING_DEFAULT_WARNING; 2241 } 2242 2243 return u_terminateUChars(dest, destCapacity, length, pErrorCode); 2244} 2245 2246static int32_t 2247_getDisplayNameForComponent(const char *locale, 2248 const char *displayLocale, 2249 UChar *dest, int32_t destCapacity, 2250 int32_t (*getter)(const char *, char *, int32_t, UErrorCode *), 2251 const char *tag, 2252 UErrorCode *pErrorCode) { 2253 char localeBuffer[ULOC_FULLNAME_CAPACITY*4]; 2254 int32_t length; 2255 UErrorCode localStatus; 2256 2257 /* argument checking */ 2258 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2259 return 0; 2260 } 2261 2262 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { 2263 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2264 return 0; 2265 } 2266 2267 localStatus = U_ZERO_ERROR; 2268 length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus); 2269 if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) { 2270 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2271 return 0; 2272 } 2273 if(length==0) { 2274 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 2275 } 2276 2277 return _getStringOrCopyKey(NULL, displayLocale, 2278 tag, NULL, localeBuffer, 2279 localeBuffer, 2280 dest, destCapacity, 2281 pErrorCode); 2282} 2283 2284U_CAPI int32_t U_EXPORT2 2285uloc_getDisplayLanguage(const char *locale, 2286 const char *displayLocale, 2287 UChar *dest, int32_t destCapacity, 2288 UErrorCode *pErrorCode) { 2289 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, 2290 uloc_getLanguage, _kLanguages, pErrorCode); 2291} 2292 2293U_CAPI int32_t U_EXPORT2 2294uloc_getDisplayScript(const char* locale, 2295 const char* displayLocale, 2296 UChar *dest, int32_t destCapacity, 2297 UErrorCode *pErrorCode) 2298{ 2299 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, 2300 uloc_getScript, _kScripts, pErrorCode); 2301} 2302 2303U_CAPI int32_t U_EXPORT2 2304uloc_getDisplayCountry(const char *locale, 2305 const char *displayLocale, 2306 UChar *dest, int32_t destCapacity, 2307 UErrorCode *pErrorCode) { 2308 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, 2309 uloc_getCountry, _kCountries, pErrorCode); 2310} 2311 2312/* 2313 * TODO separate variant1_variant2_variant3... 2314 * by getting each tag's display string and concatenating them with ", " 2315 * in between - similar to uloc_getDisplayName() 2316 */ 2317U_CAPI int32_t U_EXPORT2 2318uloc_getDisplayVariant(const char *locale, 2319 const char *displayLocale, 2320 UChar *dest, int32_t destCapacity, 2321 UErrorCode *pErrorCode) { 2322 return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, 2323 uloc_getVariant, _kVariants, pErrorCode); 2324} 2325 2326U_CAPI int32_t U_EXPORT2 2327uloc_getDisplayName(const char *locale, 2328 const char *displayLocale, 2329 UChar *dest, int32_t destCapacity, 2330 UErrorCode *pErrorCode) 2331{ 2332 int32_t length, length2, length3 = 0; 2333 UBool hasLanguage, hasScript, hasCountry, hasVariant, hasKeywords; 2334 UEnumeration* keywordEnum = NULL; 2335 int32_t keywordCount = 0; 2336 const char *keyword = NULL; 2337 int32_t keywordLen = 0; 2338 char keywordValue[256]; 2339 int32_t keywordValueLen = 0; 2340 2341 /* argument checking */ 2342 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 2343 return 0; 2344 } 2345 2346 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { 2347 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2348 return 0; 2349 } 2350 2351 /* 2352 * if there is a language, then write "language (country, variant)" 2353 * otherwise write "country, variant" 2354 */ 2355 2356 /* write the language */ 2357 length=uloc_getDisplayLanguage(locale, displayLocale, 2358 dest, destCapacity, 2359 pErrorCode); 2360 hasLanguage= length>0; 2361 2362 if(hasLanguage) { 2363 /* append " (" */ 2364 if(length<destCapacity) { 2365 dest[length]=0x20; 2366 } 2367 ++length; 2368 if(length<destCapacity) { 2369 dest[length]=0x28; 2370 } 2371 ++length; 2372 } 2373 2374 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2375 /* keep preflighting */ 2376 *pErrorCode=U_ZERO_ERROR; 2377 } 2378 2379 /* append the script */ 2380 if(length<destCapacity) { 2381 length2=uloc_getDisplayScript(locale, displayLocale, 2382 dest+length, destCapacity-length, 2383 pErrorCode); 2384 } else { 2385 length2=uloc_getDisplayScript(locale, displayLocale, 2386 NULL, 0, 2387 pErrorCode); 2388 } 2389 hasScript= length2>0; 2390 length+=length2; 2391 2392 if(hasScript) { 2393 /* append ", " */ 2394 if(length<destCapacity) { 2395 dest[length]=0x2c; 2396 } 2397 ++length; 2398 if(length<destCapacity) { 2399 dest[length]=0x20; 2400 } 2401 ++length; 2402 } 2403 2404 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2405 /* keep preflighting */ 2406 *pErrorCode=U_ZERO_ERROR; 2407 } 2408 2409 /* append the country */ 2410 if(length<destCapacity) { 2411 length2=uloc_getDisplayCountry(locale, displayLocale, 2412 dest+length, destCapacity-length, 2413 pErrorCode); 2414 } else { 2415 length2=uloc_getDisplayCountry(locale, displayLocale, 2416 NULL, 0, 2417 pErrorCode); 2418 } 2419 hasCountry= length2>0; 2420 length+=length2; 2421 2422 if(hasCountry) { 2423 /* append ", " */ 2424 if(length<destCapacity) { 2425 dest[length]=0x2c; 2426 } 2427 ++length; 2428 if(length<destCapacity) { 2429 dest[length]=0x20; 2430 } 2431 ++length; 2432 } 2433 2434 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2435 /* keep preflighting */ 2436 *pErrorCode=U_ZERO_ERROR; 2437 } 2438 2439 /* append the variant */ 2440 if(length<destCapacity) { 2441 length2=uloc_getDisplayVariant(locale, displayLocale, 2442 dest+length, destCapacity-length, 2443 pErrorCode); 2444 } else { 2445 length2=uloc_getDisplayVariant(locale, displayLocale, 2446 NULL, 0, 2447 pErrorCode); 2448 } 2449 hasVariant= length2>0; 2450 length+=length2; 2451 2452 if(hasVariant) { 2453 /* append ", " */ 2454 if(length<destCapacity) { 2455 dest[length]=0x2c; 2456 } 2457 ++length; 2458 if(length<destCapacity) { 2459 dest[length]=0x20; 2460 } 2461 ++length; 2462 } 2463 2464 keywordEnum = uloc_openKeywords(locale, pErrorCode); 2465 2466 for(keywordCount = uenum_count(keywordEnum, pErrorCode); keywordCount > 0 ; keywordCount--){ 2467 if(U_FAILURE(*pErrorCode)){ 2468 break; 2469 } 2470 /* the uenum_next returns NUL terminated string */ 2471 keyword = uenum_next(keywordEnum, &keywordLen, pErrorCode); 2472 if(length + length3 < destCapacity) { 2473 length3 += uloc_getDisplayKeyword(keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode); 2474 } else { 2475 length3 += uloc_getDisplayKeyword(keyword, displayLocale, NULL, 0, pErrorCode); 2476 } 2477 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2478 /* keep preflighting */ 2479 *pErrorCode=U_ZERO_ERROR; 2480 } 2481 keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, 256, pErrorCode); 2482 if(keywordValueLen) { 2483 if(length + length3 < destCapacity) { 2484 dest[length + length3] = 0x3D; 2485 } 2486 length3++; 2487 if(length + length3 < destCapacity) { 2488 length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, dest+length+length3, destCapacity-length-length3, pErrorCode); 2489 } else { 2490 length3 += uloc_getDisplayKeywordValue(locale, keyword, displayLocale, NULL, 0, pErrorCode); 2491 } 2492 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2493 /* keep preflighting */ 2494 *pErrorCode=U_ZERO_ERROR; 2495 } 2496 } 2497 if(keywordCount > 1) { 2498 if(length + length3 + 1 < destCapacity && keywordCount) { 2499 dest[length + length3]=0x2c; 2500 dest[length + length3+1]=0x20; 2501 } 2502 length3++; /* ',' */ 2503 length3++; /* ' ' */ 2504 } 2505 } 2506 uenum_close(keywordEnum); 2507 2508 hasKeywords = length3 > 0; 2509 length += length3; 2510 2511 2512 2513 if ((hasScript && !hasCountry) 2514 || ((hasScript || hasCountry) && !hasVariant && !hasKeywords) 2515 || ((hasScript || hasCountry || hasVariant) && !hasKeywords) 2516 || (hasLanguage && !hasScript && !hasCountry && !hasVariant && !hasKeywords)) 2517 { 2518 /* remove ", " or " (" */ 2519 length-=2; 2520 } 2521 2522 if (hasLanguage && (hasScript || hasCountry || hasVariant || hasKeywords)) { 2523 /* append ")" */ 2524 if(length<destCapacity) { 2525 dest[length]=0x29; 2526 } 2527 ++length; 2528 } 2529 2530 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { 2531 /* keep preflighting */ 2532 *pErrorCode=U_ZERO_ERROR; 2533 } 2534 2535 return u_terminateUChars(dest, destCapacity, length, pErrorCode); 2536} 2537 2538U_CAPI int32_t U_EXPORT2 2539uloc_getDisplayKeyword(const char* keyword, 2540 const char* displayLocale, 2541 UChar* dest, 2542 int32_t destCapacity, 2543 UErrorCode* status){ 2544 2545 /* argument checking */ 2546 if(status==NULL || U_FAILURE(*status)) { 2547 return 0; 2548 } 2549 2550 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { 2551 *status=U_ILLEGAL_ARGUMENT_ERROR; 2552 return 0; 2553 } 2554 2555 2556 /* pass itemKey=NULL to look for a top-level item */ 2557 return _getStringOrCopyKey(NULL, displayLocale, 2558 _kKeys, NULL, 2559 keyword, 2560 keyword, 2561 dest, destCapacity, 2562 status); 2563 2564} 2565 2566 2567#define UCURRENCY_DISPLAY_NAME_INDEX 1 2568 2569U_CAPI int32_t U_EXPORT2 2570uloc_getDisplayKeywordValue( const char* locale, 2571 const char* keyword, 2572 const char* displayLocale, 2573 UChar* dest, 2574 int32_t destCapacity, 2575 UErrorCode* status){ 2576 2577 2578 char keywordValue[ULOC_FULLNAME_CAPACITY*4]; 2579 int32_t capacity = ULOC_FULLNAME_CAPACITY*4; 2580 int32_t keywordValueLen =0; 2581 2582 /* argument checking */ 2583 if(status==NULL || U_FAILURE(*status)) { 2584 return 0; 2585 } 2586 2587 if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { 2588 *status=U_ILLEGAL_ARGUMENT_ERROR; 2589 return 0; 2590 } 2591 2592 /* get the keyword value */ 2593 keywordValue[0]=0; 2594 keywordValueLen = uloc_getKeywordValue(locale, keyword, keywordValue, capacity, status); 2595 2596 /* 2597 * if the keyword is equal to currency .. then to get the display name 2598 * we need to do the fallback ourselves 2599 */ 2600 if(uprv_stricmp(keyword, _kCurrency)==0){ 2601 2602 int32_t dispNameLen = 0; 2603 const UChar *dispName = NULL; 2604 2605 UResourceBundle *bundle = ures_open(NULL, displayLocale, status); 2606 UResourceBundle *currencies = ures_getByKey(bundle, _kCurrencies, NULL, status); 2607 UResourceBundle *currency = ures_getByKeyWithFallback(currencies, keywordValue, NULL, status); 2608 2609 dispName = ures_getStringByIndex(currency, UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status); 2610 2611 /*close the bundles */ 2612 ures_close(currency); 2613 ures_close(currencies); 2614 ures_close(bundle); 2615 2616 if(U_FAILURE(*status)){ 2617 if(*status == U_MISSING_RESOURCE_ERROR){ 2618 /* we just want to write the value over if nothing is available */ 2619 *status = U_USING_DEFAULT_WARNING; 2620 }else{ 2621 return 0; 2622 } 2623 } 2624 2625 /* now copy the dispName over if not NULL */ 2626 if(dispName != NULL){ 2627 if(dispNameLen <= destCapacity){ 2628 uprv_memcpy(dest, dispName, dispNameLen * U_SIZEOF_UCHAR); 2629 return u_terminateUChars(dest, destCapacity, dispNameLen, status); 2630 }else{ 2631 *status = U_BUFFER_OVERFLOW_ERROR; 2632 return dispNameLen; 2633 } 2634 }else{ 2635 /* we have not found the display name for the value .. just copy over */ 2636 if(keywordValueLen <= destCapacity){ 2637 u_charsToUChars(keywordValue, dest, keywordValueLen); 2638 return u_terminateUChars(dest, destCapacity, keywordValueLen, status); 2639 }else{ 2640 *status = U_BUFFER_OVERFLOW_ERROR; 2641 return keywordValueLen; 2642 } 2643 } 2644 2645 2646 }else{ 2647 2648 return _getStringOrCopyKey(NULL, displayLocale, 2649 _kTypes, keyword, 2650 keywordValue, 2651 keywordValue, 2652 dest, destCapacity, 2653 status); 2654 } 2655} 2656 2657/* ### Get available **************************************************/ 2658 2659static UBool U_CALLCONV uloc_cleanup(void) { 2660 char ** temp; 2661 2662 if (_installedLocales) { 2663 temp = _installedLocales; 2664 _installedLocales = NULL; 2665 2666 _installedLocalesCount = 0; 2667 2668 uprv_free(temp); 2669 } 2670 return TRUE; 2671} 2672 2673static void _load_installedLocales() 2674{ 2675 UBool localesLoaded; 2676 2677 UMTX_CHECK(NULL, _installedLocales != NULL, localesLoaded); 2678 2679 if (localesLoaded == FALSE) { 2680 UResourceBundle *indexLocale = NULL; 2681 UResourceBundle installed; 2682 UErrorCode status = U_ZERO_ERROR; 2683 char ** temp; 2684 int32_t i = 0; 2685 int32_t localeCount; 2686 2687 ures_initStackObject(&installed); 2688 indexLocale = ures_openDirect(NULL, _kIndexLocaleName, &status); 2689 ures_getByKey(indexLocale, _kIndexTag, &installed, &status); 2690 2691 if(U_SUCCESS(status)) { 2692 localeCount = ures_getSize(&installed); 2693 temp = (char **) uprv_malloc(sizeof(char*) * (localeCount+1)); 2694 /* Check for null pointer */ 2695 if (temp != NULL) { 2696 ures_resetIterator(&installed); 2697 while(ures_hasNext(&installed)) { 2698 ures_getNextString(&installed, NULL, (const char **)&temp[i++], &status); 2699 } 2700 temp[i] = NULL; 2701 2702 umtx_lock(NULL); 2703 if (_installedLocales == NULL) 2704 { 2705 _installedLocalesCount = localeCount; 2706 _installedLocales = temp; 2707 temp = NULL; 2708 ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup); 2709 } 2710 umtx_unlock(NULL); 2711 2712 uprv_free(temp); 2713 } 2714 } 2715 ures_close(&installed); 2716 ures_close(indexLocale); 2717 } 2718} 2719 2720U_CAPI const char* U_EXPORT2 2721uloc_getAvailable(int32_t offset) 2722{ 2723 2724 _load_installedLocales(); 2725 2726 if (offset > _installedLocalesCount) 2727 return NULL; 2728 return _installedLocales[offset]; 2729} 2730 2731U_CAPI int32_t U_EXPORT2 2732uloc_countAvailable() 2733{ 2734 _load_installedLocales(); 2735 return _installedLocalesCount; 2736} 2737 2738/** 2739 * Returns a list of all language codes defined in ISO 639. This is a pointer 2740 * to an array of pointers to arrays of char. All of these pointers are owned 2741 * by ICU-- do not delete them, and do not write through them. The array is 2742 * terminated with a null pointer. 2743 */ 2744U_CAPI const char* const* U_EXPORT2 2745uloc_getISOLanguages() 2746{ 2747 return LANGUAGES; 2748} 2749 2750/** 2751 * Returns a list of all 2-letter country codes defined in ISO 639. This is a 2752 * pointer to an array of pointers to arrays of char. All of these pointers are 2753 * owned by ICU-- do not delete them, and do not write through them. The array is 2754 * terminated with a null pointer. 2755 */ 2756U_CAPI const char* const* U_EXPORT2 2757uloc_getISOCountries() 2758{ 2759 return COUNTRIES; 2760} 2761 2762 2763/* this function to be moved into cstring.c later */ 2764static char gDecimal = 0; 2765 2766static /* U_CAPI */ 2767double 2768/* U_EXPORT2 */ 2769_uloc_strtod(const char *start, char **end) { 2770 char *decimal; 2771 char *myEnd; 2772 char buf[30]; 2773 double rv; 2774 if (!gDecimal) { 2775 char rep[5]; 2776 /* For machines that decide to change the decimal on you, 2777 and try to be too smart with localization. 2778 This normally should be just a '.'. */ 2779 sprintf(rep, "%+1.1f", 1.0); 2780 gDecimal = rep[2]; 2781 } 2782 2783 if(gDecimal == '.') { 2784 return uprv_strtod(start, end); /* fall through to OS */ 2785 } else { 2786 uprv_strncpy(buf, start, 29); 2787 buf[29]=0; 2788 decimal = uprv_strchr(buf, '.'); 2789 if(decimal) { 2790 *decimal = gDecimal; 2791 } else { 2792 return uprv_strtod(start, end); /* no decimal point */ 2793 } 2794 rv = uprv_strtod(buf, &myEnd); 2795 if(end) { 2796 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */ 2797 } 2798 return rv; 2799 } 2800} 2801 2802typedef struct { 2803 float q; 2804 int32_t dummy; /* to avoid uninitialized memory copy from qsort */ 2805 char *locale; 2806} _acceptLangItem; 2807 2808static int32_t U_CALLCONV 2809uloc_acceptLanguageCompare(const void *context, const void *a, const void *b) 2810{ 2811 const _acceptLangItem *aa = (const _acceptLangItem*)a; 2812 const _acceptLangItem *bb = (const _acceptLangItem*)b; 2813 2814 int32_t rc = 0; 2815 if(bb->q < aa->q) { 2816 rc = -1; /* A > B */ 2817 } else if(bb->q > aa->q) { 2818 rc = 1; /* A < B */ 2819 } else { 2820 rc = 0; /* A = B */ 2821 } 2822 2823 if(rc==0) { 2824 rc = uprv_stricmp(aa->locale, bb->locale); 2825 } 2826 2827#if defined(ULOC_DEBUG) 2828 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n", 2829 aa->locale, aa->q, 2830 bb->locale, bb->q, 2831 rc);*/ 2832#endif 2833 2834 return rc; 2835} 2836 2837static ULayoutType 2838_uloc_getOrientationHelper(const char* localeId, 2839 const char* key, 2840 UErrorCode *status) 2841{ 2842 ULayoutType result = ULOC_LAYOUT_UNKNOWN; 2843 2844 if (!U_FAILURE(*status)) { 2845 int32_t length = 0; 2846 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 2847 2848 uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status); 2849 2850 if (!U_FAILURE(*status)) { 2851 const UChar* const value = 2852 _res_getTableStringWithFallback( 2853 NULL, 2854 localeBuffer, 2855 "layout", 2856 NULL, 2857 key, 2858 &length, 2859 status); 2860 2861 if (!U_FAILURE(*status) && length != 0) { 2862 switch(value[0]) 2863 { 2864 case 0x0062: /* 'b' */ 2865 result = ULOC_LAYOUT_BTT; 2866 break; 2867 case 0x006C: /* 'l' */ 2868 result = ULOC_LAYOUT_LTR; 2869 break; 2870 case 0x0072: /* 'r' */ 2871 result = ULOC_LAYOUT_RTL; 2872 break; 2873 case 0x0074: /* 't' */ 2874 result = ULOC_LAYOUT_TTB; 2875 break; 2876 default: 2877 *status = U_INTERNAL_PROGRAM_ERROR; 2878 break; 2879 } 2880 } 2881 } 2882 } 2883 2884 return result; 2885} 2886 2887U_DRAFT ULayoutType U_EXPORT2 2888uloc_getCharacterOrientation(const char* localeId, 2889 UErrorCode *status) 2890{ 2891 return _uloc_getOrientationHelper(localeId, "characters", status); 2892} 2893 2894/** 2895 * Get the layout line orientation for the specified locale. 2896 * 2897 * @param localeID locale name 2898 * @param status Error status 2899 * @return an enum indicating the layout orientation for lines. 2900 * @stable ICU 4.0 2901 */ 2902U_DRAFT ULayoutType U_EXPORT2 2903uloc_getLineOrientation(const char* localeId, 2904 UErrorCode *status) 2905{ 2906 return _uloc_getOrientationHelper(localeId, "lines", status); 2907} 2908 2909/* 2910mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53 2911*/ 2912 2913U_CAPI int32_t U_EXPORT2 2914uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult, 2915 const char *httpAcceptLanguage, 2916 UEnumeration* availableLocales, 2917 UErrorCode *status) 2918{ 2919 _acceptLangItem *j; 2920 _acceptLangItem smallBuffer[30]; 2921 char **strs; 2922 char tmp[ULOC_FULLNAME_CAPACITY +1]; 2923 int32_t n = 0; 2924 const char *itemEnd; 2925 const char *paramEnd; 2926 const char *s; 2927 const char *t; 2928 int32_t res; 2929 int32_t i; 2930 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); 2931 int32_t jSize; 2932 char *tempstr; /* Use for null pointer check */ 2933 2934 j = smallBuffer; 2935 jSize = sizeof(smallBuffer)/sizeof(smallBuffer[0]); 2936 if(U_FAILURE(*status)) { 2937 return -1; 2938 } 2939 2940 for(s=httpAcceptLanguage;s&&*s;) { 2941 while(isspace(*s)) /* eat space at the beginning */ 2942 s++; 2943 itemEnd=uprv_strchr(s,','); 2944 paramEnd=uprv_strchr(s,';'); 2945 if(!itemEnd) { 2946 itemEnd = httpAcceptLanguage+l; /* end of string */ 2947 } 2948 if(paramEnd && paramEnd<itemEnd) { 2949 /* semicolon (;) is closer than end (,) */ 2950 t = paramEnd+1; 2951 if(*t=='q') { 2952 t++; 2953 } 2954 while(isspace(*t)) { 2955 t++; 2956 } 2957 if(*t=='=') { 2958 t++; 2959 } 2960 while(isspace(*t)) { 2961 t++; 2962 } 2963 j[n].q = (float)_uloc_strtod(t,NULL); 2964 } else { 2965 /* no semicolon - it's 1.0 */ 2966 j[n].q = 1.0f; 2967 paramEnd = itemEnd; 2968 } 2969 j[n].dummy=0; 2970 /* eat spaces prior to semi */ 2971 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--) 2972 ; 2973 /* Check for null pointer from uprv_strndup */ 2974 tempstr = uprv_strndup(s,(int32_t)((t+1)-s)); 2975 if (tempstr == NULL) { 2976 *status = U_MEMORY_ALLOCATION_ERROR; 2977 return -1; 2978 } 2979 j[n].locale = tempstr; 2980 uloc_canonicalize(j[n].locale,tmp,sizeof(tmp)/sizeof(tmp[0]),status); 2981 if(strcmp(j[n].locale,tmp)) { 2982 uprv_free(j[n].locale); 2983 j[n].locale=uprv_strdup(tmp); 2984 } 2985#if defined(ULOC_DEBUG) 2986 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ 2987#endif 2988 n++; 2989 s = itemEnd; 2990 while(*s==',') { /* eat duplicate commas */ 2991 s++; 2992 } 2993 if(n>=jSize) { 2994 if(j==smallBuffer) { /* overflowed the small buffer. */ 2995 j = uprv_malloc(sizeof(j[0])*(jSize*2)); 2996 if(j!=NULL) { 2997 uprv_memcpy(j,smallBuffer,sizeof(j[0])*jSize); 2998 } 2999#if defined(ULOC_DEBUG) 3000 fprintf(stderr,"malloced at size %d\n", jSize); 3001#endif 3002 } else { 3003 j = uprv_realloc(j, sizeof(j[0])*jSize*2); 3004#if defined(ULOC_DEBUG) 3005 fprintf(stderr,"re-alloced at size %d\n", jSize); 3006#endif 3007 } 3008 jSize *= 2; 3009 if(j==NULL) { 3010 *status = U_MEMORY_ALLOCATION_ERROR; 3011 return -1; 3012 } 3013 } 3014 } 3015 uprv_sortArray(j, n, sizeof(j[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); 3016 if(U_FAILURE(*status)) { 3017 if(j != smallBuffer) { 3018#if defined(ULOC_DEBUG) 3019 fprintf(stderr,"freeing j %p\n", j); 3020#endif 3021 uprv_free(j); 3022 } 3023 return -1; 3024 } 3025 strs = uprv_malloc((size_t)(sizeof(strs[0])*n)); 3026 /* Check for null pointer */ 3027 if (strs == NULL) { 3028 uprv_free(j); /* Free to avoid memory leak */ 3029 *status = U_MEMORY_ALLOCATION_ERROR; 3030 return -1; 3031 } 3032 for(i=0;i<n;i++) { 3033#if defined(ULOC_DEBUG) 3034 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/ 3035#endif 3036 strs[i]=j[i].locale; 3037 } 3038 res = uloc_acceptLanguage(result, resultAvailable, outResult, 3039 (const char**)strs, n, availableLocales, status); 3040 for(i=0;i<n;i++) { 3041 uprv_free(strs[i]); 3042 } 3043 uprv_free(strs); 3044 if(j != smallBuffer) { 3045#if defined(ULOC_DEBUG) 3046 fprintf(stderr,"freeing j %p\n", j); 3047#endif 3048 uprv_free(j); 3049 } 3050 return res; 3051} 3052 3053 3054U_CAPI int32_t U_EXPORT2 3055uloc_acceptLanguage(char *result, int32_t resultAvailable, 3056 UAcceptResult *outResult, const char **acceptList, 3057 int32_t acceptListCount, 3058 UEnumeration* availableLocales, 3059 UErrorCode *status) 3060{ 3061 int32_t i,j; 3062 int32_t len; 3063 int32_t maxLen=0; 3064 char tmp[ULOC_FULLNAME_CAPACITY+1]; 3065 const char *l; 3066 char **fallbackList; 3067 if(U_FAILURE(*status)) { 3068 return -1; 3069 } 3070 fallbackList = uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)); 3071 if(fallbackList==NULL) { 3072 *status = U_MEMORY_ALLOCATION_ERROR; 3073 return -1; 3074 } 3075 for(i=0;i<acceptListCount;i++) { 3076#if defined(ULOC_DEBUG) 3077 fprintf(stderr,"%02d: %s\n", i, acceptList[i]); 3078#endif 3079 while((l=uenum_next(availableLocales, NULL, status))) { 3080#if defined(ULOC_DEBUG) 3081 fprintf(stderr," %s\n", l); 3082#endif 3083 len = (int32_t)uprv_strlen(l); 3084 if(!uprv_strcmp(acceptList[i], l)) { 3085 if(outResult) { 3086 *outResult = ULOC_ACCEPT_VALID; 3087 } 3088#if defined(ULOC_DEBUG) 3089 fprintf(stderr, "MATCH! %s\n", l); 3090#endif 3091 if(len>0) { 3092 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 3093 } 3094 for(j=0;j<i;j++) { 3095 uprv_free(fallbackList[j]); 3096 } 3097 uprv_free(fallbackList); 3098 return u_terminateChars(result, resultAvailable, len, status); 3099 } 3100 if(len>maxLen) { 3101 maxLen = len; 3102 } 3103 } 3104 uenum_reset(availableLocales, status); 3105 /* save off parent info */ 3106 if(uloc_getParent(acceptList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { 3107 fallbackList[i] = uprv_strdup(tmp); 3108 } else { 3109 fallbackList[i]=0; 3110 } 3111 } 3112 3113 for(maxLen--;maxLen>0;maxLen--) { 3114 for(i=0;i<acceptListCount;i++) { 3115 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) { 3116#if defined(ULOC_DEBUG) 3117 fprintf(stderr,"Try: [%s]", fallbackList[i]); 3118#endif 3119 while((l=uenum_next(availableLocales, NULL, status))) { 3120#if defined(ULOC_DEBUG) 3121 fprintf(stderr," %s\n", l); 3122#endif 3123 len = (int32_t)uprv_strlen(l); 3124 if(!uprv_strcmp(fallbackList[i], l)) { 3125 if(outResult) { 3126 *outResult = ULOC_ACCEPT_FALLBACK; 3127 } 3128#if defined(ULOC_DEBUG) 3129 fprintf(stderr, "fallback MATCH! %s\n", l); 3130#endif 3131 if(len>0) { 3132 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 3133 } 3134 for(j=0;j<acceptListCount;j++) { 3135 uprv_free(fallbackList[j]); 3136 } 3137 uprv_free(fallbackList); 3138 return u_terminateChars(result, resultAvailable, len, status); 3139 } 3140 } 3141 uenum_reset(availableLocales, status); 3142 3143 if(uloc_getParent(fallbackList[i], tmp, sizeof(tmp)/sizeof(tmp[0]), status)!=0) { 3144 uprv_free(fallbackList[i]); 3145 fallbackList[i] = uprv_strdup(tmp); 3146 } else { 3147 uprv_free(fallbackList[i]); 3148 fallbackList[i]=0; 3149 } 3150 } 3151 } 3152 if(outResult) { 3153 *outResult = ULOC_ACCEPT_FAILED; 3154 } 3155 } 3156 for(i=0;i<acceptListCount;i++) { 3157 uprv_free(fallbackList[i]); 3158 } 3159 uprv_free(fallbackList); 3160 return -1; 3161} 3162 3163 3164/** 3165 * This function looks for the localeID in the likelySubtags resource. 3166 * 3167 * @param localeID The tag to find. 3168 * @param buffer A buffer to hold the matching entry 3169 * @param bufferLength The length of the output buffer 3170 * @return A pointer to "buffer" if found, or a null pointer if not. 3171 */ 3172static const char* U_CALLCONV 3173findLikelySubtags(const char* localeID, 3174 char* buffer, 3175 int32_t bufferLength, 3176 UErrorCode* err) { 3177 const char* result = NULL; 3178 3179 if (!U_FAILURE(*err)) { 3180 int32_t resLen = 0; 3181 const UChar* s = NULL; 3182 UErrorCode tmpErr = U_ZERO_ERROR; 3183 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr); 3184 if (U_SUCCESS(tmpErr)) { 3185 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr); 3186 3187 if (U_FAILURE(tmpErr)) { 3188 /* 3189 * If a resource is missing, it's not really an error, it's 3190 * just that we don't have any data for that particular locale ID. 3191 */ 3192 if (tmpErr != U_MISSING_RESOURCE_ERROR) { 3193 *err = tmpErr; 3194 } 3195 } 3196 else if (resLen >= bufferLength) { 3197 /* The buffer should never overflow. */ 3198 *err = U_INTERNAL_PROGRAM_ERROR; 3199 } 3200 else { 3201 u_UCharsToChars(s, buffer, resLen + 1); 3202 result = buffer; 3203 } 3204 3205 ures_close(subtags); 3206 } else { 3207 *err = tmpErr; 3208 } 3209 } 3210 3211 return result; 3212} 3213 3214/** 3215 * Append a tag to a buffer, adding the separator if necessary. The buffer 3216 * must be large enough to contain the resulting tag plus any separator 3217 * necessary. The tag must not be a zero-length string. 3218 * 3219 * @param tag The tag to add. 3220 * @param tagLength The length of the tag. 3221 * @param buffer The output buffer. 3222 * @param bufferLength The length of the output buffer. This is an input/ouput parameter. 3223 **/ 3224static void U_CALLCONV 3225appendTag( 3226 const char* tag, 3227 int32_t tagLength, 3228 char* buffer, 3229 int32_t* bufferLength) { 3230 3231 if (*bufferLength > 0) { 3232 buffer[*bufferLength] = '_'; 3233 ++(*bufferLength); 3234 } 3235 3236 uprv_memmove( 3237 &buffer[*bufferLength], 3238 tag, 3239 tagLength); 3240 3241 *bufferLength += tagLength; 3242} 3243 3244/** 3245 * These are the canonical strings for unknown languages, scripts and regions. 3246 **/ 3247static const char* const unknownLanguage = "und"; 3248static const char* const unknownScript = "Zzzz"; 3249static const char* const unknownRegion = "ZZ"; 3250 3251/** 3252 * Create a tag string from the supplied parameters. The lang, script and region 3253 * parameters may be NULL pointers. If they are, their corresponding length parameters 3254 * must be less than or equal to 0. 3255 * 3256 * If any of the language, script or region parameters are empty, and the alternateTags 3257 * parameter is not NULL, it will be parsed for potential language, script and region tags 3258 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or 3259 * it contains no language tag, the default tag for the unknown language is used. 3260 * 3261 * If the length of the new string exceeds the capacity of the output buffer, 3262 * the function copies as many bytes to the output buffer as it can, and returns 3263 * the error U_BUFFER_OVERFLOW_ERROR. 3264 * 3265 * If an illegal argument is provided, the function returns the error 3266 * U_ILLEGAL_ARGUMENT_ERROR. 3267 * 3268 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if 3269 * the tag string fits in the output buffer, but the null terminator doesn't. 3270 * 3271 * @param lang The language tag to use. 3272 * @param langLength The length of the language tag. 3273 * @param script The script tag to use. 3274 * @param scriptLength The length of the script tag. 3275 * @param region The region tag to use. 3276 * @param regionLength The length of the region tag. 3277 * @param trailing Any trailing data to append to the new tag. 3278 * @param trailingLength The length of the trailing data. 3279 * @param alternateTags A string containing any alternate tags. 3280 * @param tag The output buffer. 3281 * @param tagCapacity The capacity of the output buffer. 3282 * @param err A pointer to a UErrorCode for error reporting. 3283 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error. 3284 **/ 3285static int32_t U_CALLCONV 3286createTagStringWithAlternates( 3287 const char* lang, 3288 int32_t langLength, 3289 const char* script, 3290 int32_t scriptLength, 3291 const char* region, 3292 int32_t regionLength, 3293 const char* trailing, 3294 int32_t trailingLength, 3295 const char* alternateTags, 3296 char* tag, 3297 int32_t tagCapacity, 3298 UErrorCode* err) { 3299 3300 if (U_FAILURE(*err)) { 3301 goto error; 3302 } 3303 else if (tag == NULL || 3304 tagCapacity <= 0 || 3305 langLength >= ULOC_LANG_CAPACITY || 3306 scriptLength >= ULOC_SCRIPT_CAPACITY || 3307 regionLength >= ULOC_COUNTRY_CAPACITY) { 3308 goto error; 3309 } 3310 else { 3311 /** 3312 * ULOC_FULLNAME_CAPACITY will provide enough capacity 3313 * that we can build a string that contains the language, 3314 * script and region code without worrying about overrunning 3315 * the user-supplied buffer. 3316 **/ 3317 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 3318 int32_t tagLength = 0; 3319 int32_t capacityRemaining = tagCapacity; 3320 UBool regionAppended = FALSE; 3321 3322 if (langLength > 0) { 3323 appendTag( 3324 lang, 3325 langLength, 3326 tagBuffer, 3327 &tagLength); 3328 } 3329 else if (alternateTags == NULL) { 3330 /* 3331 * Append the value for an unknown language, if 3332 * we found no language. 3333 */ 3334 appendTag( 3335 unknownLanguage, 3336 uprv_strlen(unknownLanguage), 3337 tagBuffer, 3338 &tagLength); 3339 } 3340 else { 3341 /* 3342 * Parse the alternateTags string for the language. 3343 */ 3344 char alternateLang[ULOC_LANG_CAPACITY]; 3345 int32_t alternateLangLength = sizeof(alternateLang); 3346 3347 alternateLangLength = 3348 uloc_getLanguage( 3349 alternateTags, 3350 alternateLang, 3351 alternateLangLength, 3352 err); 3353 if(U_FAILURE(*err) || 3354 alternateLangLength >= ULOC_LANG_CAPACITY) { 3355 goto error; 3356 } 3357 else if (alternateLangLength == 0) { 3358 /* 3359 * Append the value for an unknown language, if 3360 * we found no language. 3361 */ 3362 appendTag( 3363 unknownLanguage, 3364 uprv_strlen(unknownLanguage), 3365 tagBuffer, 3366 &tagLength); 3367 } 3368 else { 3369 appendTag( 3370 alternateLang, 3371 alternateLangLength, 3372 tagBuffer, 3373 &tagLength); 3374 } 3375 } 3376 3377 if (scriptLength > 0) { 3378 appendTag( 3379 script, 3380 scriptLength, 3381 tagBuffer, 3382 &tagLength); 3383 } 3384 else if (alternateTags != NULL) { 3385 /* 3386 * Parse the alternateTags string for the script. 3387 */ 3388 char alternateScript[ULOC_SCRIPT_CAPACITY]; 3389 3390 const int32_t alternateScriptLength = 3391 uloc_getScript( 3392 alternateTags, 3393 alternateScript, 3394 sizeof(alternateScript), 3395 err); 3396 3397 if (U_FAILURE(*err) || 3398 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { 3399 goto error; 3400 } 3401 else if (alternateScriptLength > 0) { 3402 appendTag( 3403 alternateScript, 3404 alternateScriptLength, 3405 tagBuffer, 3406 &tagLength); 3407 } 3408 } 3409 3410 if (regionLength > 0) { 3411 appendTag( 3412 region, 3413 regionLength, 3414 tagBuffer, 3415 &tagLength); 3416 3417 regionAppended = TRUE; 3418 } 3419 else if (alternateTags != NULL) { 3420 /* 3421 * Parse the alternateTags string for the region. 3422 */ 3423 char alternateRegion[ULOC_COUNTRY_CAPACITY]; 3424 3425 const int32_t alternateRegionLength = 3426 uloc_getCountry( 3427 alternateTags, 3428 alternateRegion, 3429 sizeof(alternateRegion), 3430 err); 3431 if (U_FAILURE(*err) || 3432 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { 3433 goto error; 3434 } 3435 else if (alternateRegionLength > 0) { 3436 appendTag( 3437 alternateRegion, 3438 alternateRegionLength, 3439 tagBuffer, 3440 &tagLength); 3441 3442 regionAppended = TRUE; 3443 } 3444 } 3445 3446 { 3447 const int32_t toCopy = 3448 tagLength >= tagCapacity ? tagCapacity : tagLength; 3449 3450 /** 3451 * Copy the partial tag from our internal buffer to the supplied 3452 * target. 3453 **/ 3454 uprv_memcpy( 3455 tag, 3456 tagBuffer, 3457 toCopy); 3458 3459 capacityRemaining -= toCopy; 3460 } 3461 3462 if (trailingLength > 0) { 3463 if (capacityRemaining > 0 && !regionAppended) { 3464 tag[tagLength++] = '_'; 3465 --capacityRemaining; 3466 } 3467 3468 if (capacityRemaining > 0) { 3469 /* 3470 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we 3471 * don't know if the user-supplied buffers overlap. 3472 */ 3473 const int32_t toCopy = 3474 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength; 3475 3476 uprv_memmove( 3477 &tag[tagLength], 3478 trailing, 3479 toCopy); 3480 } 3481 } 3482 3483 tagLength += trailingLength; 3484 3485 return u_terminateChars( 3486 tag, 3487 tagCapacity, 3488 tagLength, 3489 err); 3490 } 3491 3492error: 3493 3494 /** 3495 * An overflow indicates the locale ID passed in 3496 * is ill-formed. If we got here, and there was 3497 * no previous error, it's an implicit overflow. 3498 **/ 3499 if (*err == U_BUFFER_OVERFLOW_ERROR || 3500 U_SUCCESS(*err)) { 3501 *err = U_ILLEGAL_ARGUMENT_ERROR; 3502 } 3503 3504 return -1; 3505} 3506 3507/** 3508 * Create a tag string from the supplied parameters. The lang, script and region 3509 * parameters may be NULL pointers. If they are, their corresponding length parameters 3510 * must be less than or equal to 0. If the lang parameter is an empty string, the 3511 * default value for an unknown language is written to the output buffer. 3512 * 3513 * If the length of the new string exceeds the capacity of the output buffer, 3514 * the function copies as many bytes to the output buffer as it can, and returns 3515 * the error U_BUFFER_OVERFLOW_ERROR. 3516 * 3517 * If an illegal argument is provided, the function returns the error 3518 * U_ILLEGAL_ARGUMENT_ERROR. 3519 * 3520 * @param lang The language tag to use. 3521 * @param langLength The length of the language tag. 3522 * @param script The script tag to use. 3523 * @param scriptLength The length of the script tag. 3524 * @param region The region tag to use. 3525 * @param regionLength The length of the region tag. 3526 * @param trailing Any trailing data to append to the new tag. 3527 * @param trailingLength The length of the trailing data. 3528 * @param tag The output buffer. 3529 * @param tagCapacity The capacity of the output buffer. 3530 * @param err A pointer to a UErrorCode for error reporting. 3531 * @return The length of the tag string, which may be greater than tagCapacity. 3532 **/ 3533static int32_t U_CALLCONV 3534createTagString( 3535 const char* lang, 3536 int32_t langLength, 3537 const char* script, 3538 int32_t scriptLength, 3539 const char* region, 3540 int32_t regionLength, 3541 const char* trailing, 3542 int32_t trailingLength, 3543 char* tag, 3544 int32_t tagCapacity, 3545 UErrorCode* err) 3546{ 3547 return createTagStringWithAlternates( 3548 lang, 3549 langLength, 3550 script, 3551 scriptLength, 3552 region, 3553 regionLength, 3554 trailing, 3555 trailingLength, 3556 NULL, 3557 tag, 3558 tagCapacity, 3559 err); 3560} 3561 3562/** 3563 * Parse the language, script, and region subtags from a tag string, and copy the 3564 * results into the corresponding output parameters. The buffers are null-terminated, 3565 * unless overflow occurs. 3566 * 3567 * The langLength, scriptLength, and regionLength parameters are input/output 3568 * parameters, and must contain the capacity of their corresponding buffers on 3569 * input. On output, they will contain the actual length of the buffers, not 3570 * including the null terminator. 3571 * 3572 * If the length of any of the output subtags exceeds the capacity of the corresponding 3573 * buffer, the function copies as many bytes to the output buffer as it can, and returns 3574 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow 3575 * occurs. 3576 * 3577 * If an illegal argument is provided, the function returns the error 3578 * U_ILLEGAL_ARGUMENT_ERROR. 3579 * 3580 * @param localeID The locale ID to parse. 3581 * @param lang The language tag buffer. 3582 * @param langLength The length of the language tag. 3583 * @param script The script tag buffer. 3584 * @param scriptLength The length of the script tag. 3585 * @param region The region tag buffer. 3586 * @param regionLength The length of the region tag. 3587 * @param err A pointer to a UErrorCode for error reporting. 3588 * @return The number of chars of the localeID parameter consumed. 3589 **/ 3590static int32_t U_CALLCONV 3591parseTagString( 3592 const char* localeID, 3593 char* lang, 3594 int32_t* langLength, 3595 char* script, 3596 int32_t* scriptLength, 3597 char* region, 3598 int32_t* regionLength, 3599 UErrorCode* err) 3600{ 3601 const char* position = localeID; 3602 int32_t subtagLength = 0; 3603 3604 if(U_FAILURE(*err) || 3605 localeID == NULL || 3606 lang == NULL || 3607 langLength == NULL || 3608 script == NULL || 3609 scriptLength == NULL || 3610 region == NULL || 3611 regionLength == NULL) { 3612 goto error; 3613 } 3614 3615 subtagLength = _getLanguage(position, lang, *langLength, &position); 3616 u_terminateChars(lang, *langLength, subtagLength, err); 3617 3618 /* 3619 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING 3620 * to be an error, because it indicates the user-supplied tag is 3621 * not well-formed. 3622 */ 3623 if(U_FAILURE(*err)) { 3624 goto error; 3625 } 3626 3627 *langLength = subtagLength; 3628 3629 /* 3630 * If no language was present, use the value of unknownLanguage 3631 * instead. Otherwise, move past any separator. 3632 */ 3633 if (*langLength == 0) { 3634 uprv_strcpy( 3635 lang, 3636 unknownLanguage); 3637 *langLength = uprv_strlen(lang); 3638 } 3639 else if (_isIDSeparator(*position)) { 3640 ++position; 3641 } 3642 3643 subtagLength = _getScript(position, script, *scriptLength, &position); 3644 u_terminateChars(script, *scriptLength, subtagLength, err); 3645 3646 if(U_FAILURE(*err)) { 3647 goto error; 3648 } 3649 3650 *scriptLength = subtagLength; 3651 3652 if (*scriptLength > 0) { 3653 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { 3654 /** 3655 * If the script part is the "unknown" script, then don't return it. 3656 **/ 3657 *scriptLength = 0; 3658 } 3659 3660 /* 3661 * Move past any separator. 3662 */ 3663 if (_isIDSeparator(*position)) { 3664 ++position; 3665 } 3666 } 3667 3668 subtagLength = _getCountry(position, region, *regionLength, &position); 3669 u_terminateChars(region, *regionLength, subtagLength, err); 3670 3671 if(U_FAILURE(*err)) { 3672 goto error; 3673 } 3674 3675 *regionLength = subtagLength; 3676 3677 if (*regionLength > 0) { 3678 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { 3679 /** 3680 * If the region part is the "unknown" region, then don't return it. 3681 **/ 3682 *regionLength = 0; 3683 } 3684 } 3685 3686exit: 3687 3688 return (int32_t)(position - localeID); 3689 3690error: 3691 3692 /** 3693 * If we get here, we have no explicit error, it's the result of an 3694 * illegal argument. 3695 **/ 3696 if (!U_FAILURE(*err)) { 3697 *err = U_ILLEGAL_ARGUMENT_ERROR; 3698 } 3699 3700 goto exit; 3701} 3702 3703static int32_t U_CALLCONV 3704createLikelySubtagsString( 3705 const char* lang, 3706 int32_t langLength, 3707 const char* script, 3708 int32_t scriptLength, 3709 const char* region, 3710 int32_t regionLength, 3711 const char* variants, 3712 int32_t variantsLength, 3713 char* tag, 3714 int32_t tagCapacity, 3715 UErrorCode* err) 3716{ 3717 /** 3718 * ULOC_FULLNAME_CAPACITY will provide enough capacity 3719 * that we can build a string that contains the language, 3720 * script and region code without worrying about overrunning 3721 * the user-supplied buffer. 3722 **/ 3723 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 3724 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; 3725 int32_t tagBufferLength = 0; 3726 3727 if(U_FAILURE(*err)) { 3728 goto error; 3729 } 3730 3731 /** 3732 * Try the language with the script and region first. 3733 **/ 3734 if (scriptLength > 0 && regionLength > 0) { 3735 3736 const char* likelySubtags = NULL; 3737 3738 tagBufferLength = createTagString( 3739 lang, 3740 langLength, 3741 script, 3742 scriptLength, 3743 region, 3744 regionLength, 3745 NULL, 3746 0, 3747 tagBuffer, 3748 sizeof(tagBuffer), 3749 err); 3750 if(U_FAILURE(*err)) { 3751 goto error; 3752 } 3753 3754 likelySubtags = 3755 findLikelySubtags( 3756 tagBuffer, 3757 likelySubtagsBuffer, 3758 sizeof(likelySubtagsBuffer), 3759 err); 3760 if(U_FAILURE(*err)) { 3761 goto error; 3762 } 3763 3764 if (likelySubtags != NULL) { 3765 /* Always use the language tag from the 3766 maximal string, since it may be more 3767 specific than the one provided. */ 3768 return createTagStringWithAlternates( 3769 NULL, 3770 0, 3771 NULL, 3772 0, 3773 NULL, 3774 0, 3775 variants, 3776 variantsLength, 3777 likelySubtags, 3778 tag, 3779 tagCapacity, 3780 err); 3781 } 3782 } 3783 3784 /** 3785 * Try the language with just the script. 3786 **/ 3787 if (scriptLength > 0) { 3788 3789 const char* likelySubtags = NULL; 3790 3791 tagBufferLength = createTagString( 3792 lang, 3793 langLength, 3794 script, 3795 scriptLength, 3796 NULL, 3797 0, 3798 NULL, 3799 0, 3800 tagBuffer, 3801 sizeof(tagBuffer), 3802 err); 3803 if(U_FAILURE(*err)) { 3804 goto error; 3805 } 3806 3807 likelySubtags = 3808 findLikelySubtags( 3809 tagBuffer, 3810 likelySubtagsBuffer, 3811 sizeof(likelySubtagsBuffer), 3812 err); 3813 if(U_FAILURE(*err)) { 3814 goto error; 3815 } 3816 3817 if (likelySubtags != NULL) { 3818 /* Always use the language tag from the 3819 maximal string, since it may be more 3820 specific than the one provided. */ 3821 return createTagStringWithAlternates( 3822 NULL, 3823 0, 3824 NULL, 3825 0, 3826 region, 3827 regionLength, 3828 variants, 3829 variantsLength, 3830 likelySubtags, 3831 tag, 3832 tagCapacity, 3833 err); 3834 } 3835 } 3836 3837 /** 3838 * Try the language with just the region. 3839 **/ 3840 if (regionLength > 0) { 3841 3842 const char* likelySubtags = NULL; 3843 3844 createTagString( 3845 lang, 3846 langLength, 3847 NULL, 3848 0, 3849 region, 3850 regionLength, 3851 NULL, 3852 0, 3853 tagBuffer, 3854 sizeof(tagBuffer), 3855 err); 3856 if(U_FAILURE(*err)) { 3857 goto error; 3858 } 3859 3860 likelySubtags = 3861 findLikelySubtags( 3862 tagBuffer, 3863 likelySubtagsBuffer, 3864 sizeof(likelySubtagsBuffer), 3865 err); 3866 if(U_FAILURE(*err)) { 3867 goto error; 3868 } 3869 3870 if (likelySubtags != NULL) { 3871 /* Always use the language tag from the 3872 maximal string, since it may be more 3873 specific than the one provided. */ 3874 return createTagStringWithAlternates( 3875 NULL, 3876 0, 3877 script, 3878 scriptLength, 3879 NULL, 3880 0, 3881 variants, 3882 variantsLength, 3883 likelySubtags, 3884 tag, 3885 tagCapacity, 3886 err); 3887 } 3888 } 3889 3890 /** 3891 * Finally, try just the language. 3892 **/ 3893 { 3894 const char* likelySubtags = NULL; 3895 3896 createTagString( 3897 lang, 3898 langLength, 3899 NULL, 3900 0, 3901 NULL, 3902 0, 3903 NULL, 3904 0, 3905 tagBuffer, 3906 sizeof(tagBuffer), 3907 err); 3908 if(U_FAILURE(*err)) { 3909 goto error; 3910 } 3911 3912 likelySubtags = 3913 findLikelySubtags( 3914 tagBuffer, 3915 likelySubtagsBuffer, 3916 sizeof(likelySubtagsBuffer), 3917 err); 3918 if(U_FAILURE(*err)) { 3919 goto error; 3920 } 3921 3922 if (likelySubtags != NULL) { 3923 /* Always use the language tag from the 3924 maximal string, since it may be more 3925 specific than the one provided. */ 3926 return createTagStringWithAlternates( 3927 NULL, 3928 0, 3929 script, 3930 scriptLength, 3931 region, 3932 regionLength, 3933 variants, 3934 variantsLength, 3935 likelySubtags, 3936 tag, 3937 tagCapacity, 3938 err); 3939 } 3940 } 3941 3942 return u_terminateChars( 3943 tag, 3944 tagCapacity, 3945 0, 3946 err); 3947 3948error: 3949 3950 if (!U_FAILURE(*err)) { 3951 *err = U_ILLEGAL_ARGUMENT_ERROR; 3952 } 3953 3954 return -1; 3955} 3956 3957static int32_t 3958_uloc_addLikelySubtags(const char* localeID, 3959 char* maximizedLocaleID, 3960 int32_t maximizedLocaleIDCapacity, 3961 UErrorCode* err) 3962{ 3963 char lang[ULOC_LANG_CAPACITY]; 3964 int32_t langLength = sizeof(lang); 3965 char script[ULOC_SCRIPT_CAPACITY]; 3966 int32_t scriptLength = sizeof(script); 3967 char region[ULOC_COUNTRY_CAPACITY]; 3968 int32_t regionLength = sizeof(region); 3969 const char* trailing = ""; 3970 int32_t trailingLength = 0; 3971 int32_t trailingIndex = 0; 3972 int32_t resultLength = 0; 3973 3974 if(U_FAILURE(*err)) { 3975 goto error; 3976 } 3977 else if (localeID == NULL || 3978 maximizedLocaleID == NULL || 3979 maximizedLocaleIDCapacity <= 0) { 3980 goto error; 3981 } 3982 3983 trailingIndex = parseTagString( 3984 localeID, 3985 lang, 3986 &langLength, 3987 script, 3988 &scriptLength, 3989 region, 3990 ®ionLength, 3991 err); 3992 if(U_FAILURE(*err)) { 3993 /* Overflow indicates an illegal argument error */ 3994 if (*err == U_BUFFER_OVERFLOW_ERROR) { 3995 *err = U_ILLEGAL_ARGUMENT_ERROR; 3996 } 3997 3998 goto error; 3999 } 4000 4001 /* Find the length of the trailing portion. */ 4002 trailing = &localeID[trailingIndex]; 4003 trailingLength = uprv_strlen(trailing); 4004 4005 resultLength = 4006 createLikelySubtagsString( 4007 lang, 4008 langLength, 4009 script, 4010 scriptLength, 4011 region, 4012 regionLength, 4013 trailing, 4014 trailingLength, 4015 maximizedLocaleID, 4016 maximizedLocaleIDCapacity, 4017 err); 4018 4019 if (resultLength == 0) { 4020 const int32_t localIDLength = 4021 uprv_strlen(localeID); 4022 4023 /* 4024 * If we get here, we need to return localeID. 4025 */ 4026 uprv_memcpy( 4027 maximizedLocaleID, 4028 localeID, 4029 localIDLength <= maximizedLocaleIDCapacity ? 4030 localIDLength : maximizedLocaleIDCapacity); 4031 4032 resultLength = 4033 u_terminateChars( 4034 maximizedLocaleID, 4035 maximizedLocaleIDCapacity, 4036 localIDLength, 4037 err); 4038 } 4039 4040 return resultLength; 4041 4042error: 4043 4044 if (!U_FAILURE(*err)) { 4045 *err = U_ILLEGAL_ARGUMENT_ERROR; 4046 } 4047 4048 return -1; 4049} 4050 4051static int32_t 4052_uloc_minimizeSubtags(const char* localeID, 4053 char* minimizedLocaleID, 4054 int32_t minimizedLocaleIDCapacity, 4055 UErrorCode* err) 4056{ 4057 /** 4058 * ULOC_FULLNAME_CAPACITY will provide enough capacity 4059 * that we can build a string that contains the language, 4060 * script and region code without worrying about overrunning 4061 * the user-supplied buffer. 4062 **/ 4063 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY]; 4064 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer); 4065 4066 char lang[ULOC_LANG_CAPACITY]; 4067 int32_t langLength = sizeof(lang); 4068 char script[ULOC_SCRIPT_CAPACITY]; 4069 int32_t scriptLength = sizeof(script); 4070 char region[ULOC_COUNTRY_CAPACITY]; 4071 int32_t regionLength = sizeof(region); 4072 const char* trailing = ""; 4073 int32_t trailingLength = 0; 4074 int32_t trailingIndex = 0; 4075 4076 if(U_FAILURE(*err)) { 4077 goto error; 4078 } 4079 else if (localeID == NULL || 4080 minimizedLocaleID == NULL || 4081 minimizedLocaleIDCapacity <= 0) { 4082 goto error; 4083 } 4084 4085 trailingIndex = 4086 parseTagString( 4087 localeID, 4088 lang, 4089 &langLength, 4090 script, 4091 &scriptLength, 4092 region, 4093 ®ionLength, 4094 err); 4095 if(U_FAILURE(*err)) { 4096 4097 /* Overflow indicates an illegal argument error */ 4098 if (*err == U_BUFFER_OVERFLOW_ERROR) { 4099 *err = U_ILLEGAL_ARGUMENT_ERROR; 4100 } 4101 4102 goto error; 4103 } 4104 4105 /* Find the spot where the variants begin, if any. */ 4106 trailing = &localeID[trailingIndex]; 4107 trailingLength = uprv_strlen(trailing); 4108 4109 createTagString( 4110 lang, 4111 langLength, 4112 script, 4113 scriptLength, 4114 region, 4115 regionLength, 4116 NULL, 4117 0, 4118 maximizedTagBuffer, 4119 maximizedTagBufferLength, 4120 err); 4121 if(U_FAILURE(*err)) { 4122 goto error; 4123 } 4124 4125 /** 4126 * First, we need to first get the maximization 4127 * from AddLikelySubtags. 4128 **/ 4129 maximizedTagBufferLength = 4130 uloc_addLikelySubtags( 4131 maximizedTagBuffer, 4132 maximizedTagBuffer, 4133 maximizedTagBufferLength, 4134 err); 4135 4136 if(U_FAILURE(*err)) { 4137 goto error; 4138 } 4139 4140 /** 4141 * Start first with just the language. 4142 **/ 4143 { 4144 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 4145 4146 const int32_t tagBufferLength = 4147 createLikelySubtagsString( 4148 lang, 4149 langLength, 4150 NULL, 4151 0, 4152 NULL, 4153 0, 4154 NULL, 4155 0, 4156 tagBuffer, 4157 sizeof(tagBuffer), 4158 err); 4159 4160 if(U_FAILURE(*err)) { 4161 goto error; 4162 } 4163 else if (uprv_strnicmp( 4164 maximizedTagBuffer, 4165 tagBuffer, 4166 tagBufferLength) == 0) { 4167 4168 return createTagString( 4169 lang, 4170 langLength, 4171 NULL, 4172 0, 4173 NULL, 4174 0, 4175 trailing, 4176 trailingLength, 4177 minimizedLocaleID, 4178 minimizedLocaleIDCapacity, 4179 err); 4180 } 4181 } 4182 4183 /** 4184 * Next, try the language and region. 4185 **/ 4186 if (regionLength > 0) { 4187 4188 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 4189 4190 const int32_t tagBufferLength = 4191 createLikelySubtagsString( 4192 lang, 4193 langLength, 4194 NULL, 4195 0, 4196 region, 4197 regionLength, 4198 NULL, 4199 0, 4200 tagBuffer, 4201 sizeof(tagBuffer), 4202 err); 4203 4204 if(U_FAILURE(*err)) { 4205 goto error; 4206 } 4207 else if (uprv_strnicmp( 4208 maximizedTagBuffer, 4209 tagBuffer, 4210 tagBufferLength) == 0) { 4211 4212 return createTagString( 4213 lang, 4214 langLength, 4215 NULL, 4216 0, 4217 region, 4218 regionLength, 4219 trailing, 4220 trailingLength, 4221 minimizedLocaleID, 4222 minimizedLocaleIDCapacity, 4223 err); 4224 } 4225 } 4226 4227 /** 4228 * Finally, try the language and script. This is our last chance, 4229 * since trying with all three subtags would only yield the 4230 * maximal version that we already have. 4231 **/ 4232 if (scriptLength > 0 && regionLength > 0) { 4233 char tagBuffer[ULOC_FULLNAME_CAPACITY]; 4234 4235 const int32_t tagBufferLength = 4236 createLikelySubtagsString( 4237 lang, 4238 langLength, 4239 script, 4240 scriptLength, 4241 NULL, 4242 0, 4243 NULL, 4244 0, 4245 tagBuffer, 4246 sizeof(tagBuffer), 4247 err); 4248 4249 if(U_FAILURE(*err)) { 4250 goto error; 4251 } 4252 else if (uprv_strnicmp( 4253 maximizedTagBuffer, 4254 tagBuffer, 4255 tagBufferLength) == 0) { 4256 4257 return createTagString( 4258 lang, 4259 langLength, 4260 script, 4261 scriptLength, 4262 NULL, 4263 0, 4264 trailing, 4265 trailingLength, 4266 minimizedLocaleID, 4267 minimizedLocaleIDCapacity, 4268 err); 4269 } 4270 } 4271 4272 { 4273 /** 4274 * If we got here, return the locale ID parameter. 4275 **/ 4276 const int32_t localeIDLength = uprv_strlen(localeID); 4277 4278 uprv_memcpy( 4279 minimizedLocaleID, 4280 localeID, 4281 localeIDLength <= minimizedLocaleIDCapacity ? 4282 localeIDLength : minimizedLocaleIDCapacity); 4283 4284 return u_terminateChars( 4285 minimizedLocaleID, 4286 minimizedLocaleIDCapacity, 4287 localeIDLength, 4288 err); 4289 } 4290 4291error: 4292 4293 if (!U_FAILURE(*err)) { 4294 *err = U_ILLEGAL_ARGUMENT_ERROR; 4295 } 4296 4297 return -1; 4298 4299 4300} 4301 4302static UBool 4303do_canonicalize(const char* localeID, 4304 char* buffer, 4305 int32_t bufferCapacity, 4306 UErrorCode* err) 4307{ 4308 uloc_canonicalize( 4309 localeID, 4310 buffer, 4311 bufferCapacity, 4312 err); 4313 4314 if (*err == U_STRING_NOT_TERMINATED_WARNING || 4315 *err == U_BUFFER_OVERFLOW_ERROR) { 4316 *err = U_ILLEGAL_ARGUMENT_ERROR; 4317 4318 return FALSE; 4319 } 4320 else if (U_FAILURE(*err)) { 4321 4322 return FALSE; 4323 } 4324 else { 4325 return TRUE; 4326 } 4327} 4328 4329U_DRAFT int32_t U_EXPORT2 4330uloc_addLikelySubtags(const char* localeID, 4331 char* maximizedLocaleID, 4332 int32_t maximizedLocaleIDCapacity, 4333 UErrorCode* err) 4334{ 4335 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 4336 4337 if (!do_canonicalize( 4338 localeID, 4339 localeBuffer, 4340 sizeof(localeBuffer), 4341 err)) { 4342 return -1; 4343 } 4344 else { 4345 return _uloc_addLikelySubtags( 4346 localeBuffer, 4347 maximizedLocaleID, 4348 maximizedLocaleIDCapacity, 4349 err); 4350 } 4351} 4352 4353U_DRAFT int32_t U_EXPORT2 4354uloc_minimizeSubtags(const char* localeID, 4355 char* minimizedLocaleID, 4356 int32_t minimizedLocaleIDCapacity, 4357 UErrorCode* err) 4358{ 4359 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 4360 4361 if (!do_canonicalize( 4362 localeID, 4363 localeBuffer, 4364 sizeof(localeBuffer), 4365 err)) { 4366 return -1; 4367 } 4368 else { 4369 return _uloc_minimizeSubtags( 4370 localeBuffer, 4371 minimizedLocaleID, 4372 minimizedLocaleIDCapacity, 4373 err); 4374 } 4375} 4376 4377/*eof*/ 4378