1// Copyright (C) 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4********************************************************************** 5* Copyright (C) 1997-2016, International Business Machines 6* Corporation and others. All Rights Reserved. 7********************************************************************** 8* 9* File ULOC.CPP 10* 11* Modification History: 12* 13* Date Name Description 14* 04/01/97 aliu Creation. 15* 08/21/98 stephen JDK 1.2 sync 16* 12/08/98 rtg New Locale implementation and C API 17* 03/15/99 damiba overhaul. 18* 04/06/99 stephen changed setDefault() to realloc and copy 19* 06/14/99 stephen Changed calls to ures_open for new params 20* 07/21/99 stephen Modified setDefault() to propagate to C++ 21* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, 22* brought canonicalization code into line with spec 23*****************************************************************************/ 24 25/* 26 POSIX's locale format, from putil.c: [no spaces] 27 28 ll [ _CC ] [ . MM ] [ @ VV] 29 30 l = lang, C = ctry, M = charmap, V = variant 31*/ 32 33#include "unicode/utypes.h" 34#include "unicode/ustring.h" 35#include "unicode/uloc.h" 36 37#include "putilimp.h" 38#include "ustr_imp.h" 39#include "ulocimp.h" 40#include "umutex.h" 41#include "cstring.h" 42#include "cmemory.h" 43#include "locmap.h" 44#include "uarrsort.h" 45#include "uenumimp.h" 46#include "uassert.h" 47 48#include <stdio.h> /* for sprintf */ 49 50using namespace icu; 51 52/* ### Declarations **************************************************/ 53 54/* Locale stuff from locid.cpp */ 55U_CFUNC void locale_set_default(const char *id); 56U_CFUNC const char *locale_get_default(void); 57U_CFUNC int32_t 58locale_getKeywords(const char *localeID, 59 char prev, 60 char *keywords, int32_t keywordCapacity, 61 char *values, int32_t valuesCapacity, int32_t *valLen, 62 UBool valuesToo, 63 UErrorCode *status); 64 65/* ### Data tables **************************************************/ 66 67/** 68 * Table of language codes, both 2- and 3-letter, with preference 69 * given to 2-letter codes where possible. Includes 3-letter codes 70 * that lack a 2-letter equivalent. 71 * 72 * This list must be in sorted order. This list is returned directly 73 * to the user by some API. 74 * 75 * This list must be kept in sync with LANGUAGES_3, with corresponding 76 * entries matched. 77 * 78 * This table should be terminated with a NULL entry, followed by a 79 * second list, and another NULL entry. The first list is visible to 80 * user code when this array is returned by API. The second list 81 * contains codes we support, but do not expose through user API. 82 * 83 * Notes 84 * 85 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to 86 * include the revisions up to 2001/7/27 *CWB* 87 * 88 * The 3 character codes are the terminology codes like RFC 3066. This 89 * is compatible with prior ICU codes 90 * 91 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the 92 * table but now at the end of the table because 3 character codes are 93 * duplicates. This avoids bad searches going from 3 to 2 character 94 * codes. 95 * 96 * The range qaa-qtz is reserved for local use 97 */ 98/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ 99/* ISO639 table version is 20150505 */ 100static const char * const LANGUAGES[] = { 101 "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb", 102 "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale", 103 "aln", "alt", "am", "an", "ang", "anp", "ar", "arc", 104 "arn", "aro", "arp", "arq", "arw", "ary", "arz", "as", 105 "asa", "ase", "ast", "av", "avk", "awa", "ay", "az", 106 "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", 107 "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg", 108 "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla", 109 "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh", 110 "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv", 111 "ca", "cad", "car", "cay", "cch", "ce", "ceb", "cgg", 112 "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp", 113 "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh", 114 "cs", "csb", "cu", "cv", "cy", 115 "da", "dak", "dar", "dav", "de", "del", "den", "dgr", 116 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv", 117 "dyo", "dyu", "dz", "dzg", 118 "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx", 119 "en", "enm", "eo", "es", "esu", "et", "eu", "ewo", 120 "ext", 121 "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj", 122 "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr", 123 "frs", "fur", "fy", 124 "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd", 125 "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom", 126 "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc", 127 "gur", "guz", "gv", "gwi", 128 "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil", 129 "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu", 130 "hup", "hy", "hz", 131 "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik", 132 "ilo", "inh", "io", "is", "it", "iu", "izh", 133 "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut", 134 "jv", 135 "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd", 136 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp", 137 "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk", 138 "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi", 139 "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl", 140 "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut", 141 "kv", "kw", "ky", 142 "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn", 143 "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo", 144 "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui", 145 "lun", "luo", "lus", "luy", "lv", "lzh", "lzz", 146 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", 147 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga", 148 "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", 149 "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj", 150 "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv", 151 "my", "mye", "myv", "mzn", 152 "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne", 153 "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn", 154 "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso", 155 "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", 156 "oc", "oj", "om", "or", "os", "osa", "ota", 157 "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc", 158 "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt", 159 "pon", "prg", "pro", "ps", "pt", 160 "qu", "quc", "qug", 161 "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro", 162 "rof", "rom", "rtm", "ru", "rue", "rug", "rup", 163 "rw", "rwk", 164 "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz", 165 "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh", 166 "se", "see", "seh", "sei", "sel", "ses", "sg", "sga", 167 "sgs", "shi", "shn", "shu", "si", "sid", "sk", 168 "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms", 169 "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr", 170 "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux", 171 "sv", "sw", "swb", "swc", "syc", "syr", "szl", 172 "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg", 173 "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl", 174 "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi", 175 "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt", 176 "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm", 177 "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz", 178 "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo", 179 "vot", "vro", "vun", 180 "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu", 181 "xal", "xh", "xmf", "xog", 182 "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue", 183 "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu", 184 "zun", "zxx", "zza", 185NULL, 186 "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ 187NULL 188}; 189 190static const char* const DEPRECATED_LANGUAGES[]={ 191 "in", "iw", "ji", "jw", NULL, NULL 192}; 193static const char* const REPLACEMENT_LANGUAGES[]={ 194 "id", "he", "yi", "jv", NULL, NULL 195}; 196 197/** 198 * Table of 3-letter language codes. 199 * 200 * This is a lookup table used to convert 3-letter language codes to 201 * their 2-letter equivalent, where possible. It must be kept in sync 202 * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the 203 * same language as LANGUAGES_3[i]. The commented-out lines are 204 * copied from LANGUAGES to make eyeballing this baby easier. 205 * 206 * Where a 3-letter language code has no 2-letter equivalent, the 207 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. 208 * 209 * This table should be terminated with a NULL entry, followed by a 210 * second list, and another NULL entry. The two lists correspond to 211 * the two lists in LANGUAGES. 212 */ 213/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ 214/* ISO639 table version is 20150505 */ 215static const char * const LANGUAGES_3[] = { 216 "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb", 217 "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale", 218 "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc", 219 "arn", "aro", "arp", "arq", "arw", "ary", "arz", "asm", 220 "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze", 221 "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", 222 "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul", 223 "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla", 224 "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh", 225 "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv", 226 "cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg", 227 "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp", 228 "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh", 229 "ces", "csb", "chu", "chv", "cym", 230 "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr", 231 "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div", 232 "dyo", "dyu", "dzo", "dzg", 233 "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx", 234 "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo", 235 "ext", 236 "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij", 237 "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr", 238 "frs", "fur", "fry", 239 "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla", 240 "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom", 241 "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc", 242 "gur", "guz", "glv", "gwi", 243 "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil", 244 "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun", 245 "hup", "hye", "her", 246 "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk", 247 "ilo", "inh", "ido", "isl", "ita", "iku", "izh", 248 "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut", 249 "jav", 250 "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd", 251 "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp", 252 "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz", 253 "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi", 254 "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl", 255 "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut", 256 "kom", "cor", "kir", 257 "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn", 258 "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao", 259 "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui", 260 "lun", "luo", "lus", "luy", "lav", "lzh", "lzz", 261 "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", 262 "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga", 263 "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", 264 "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj", 265 "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv", 266 "mya", "mye", "myv", "mzn", 267 "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep", 268 "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno", 269 "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso", 270 "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", 271 "oci", "oji", "orm", "ori", "oss", "osa", "ota", 272 "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc", 273 "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt", 274 "pon", "prg", "pro", "pus", "por", 275 "que", "quc", "qug", 276 "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron", 277 "rof", "rom", "rtm", "rus", "rue", "rug", "rup", 278 "kin", "rwk", 279 "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz", 280 "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh", 281 "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga", 282 "sgs", "shi", "shn", "shu", "sin", "sid", "slk", 283 "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms", 284 "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr", 285 "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux", 286 "swe", "swa", "swb", "swc", "syc", "syr", "szl", 287 "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk", 288 "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl", 289 "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi", 290 "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt", 291 "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm", 292 "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb", 293 "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol", 294 "vot", "vro", "vun", 295 "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu", 296 "xal", "xho", "xmf", "xog", 297 "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue", 298 "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul", 299 "zun", "zxx", "zza", 300NULL, 301/* "in", "iw", "ji", "jw", "sh", */ 302 "ind", "heb", "yid", "jaw", "srp", 303NULL 304}; 305 306/** 307 * Table of 2-letter country codes. 308 * 309 * This list must be in sorted order. This list is returned directly 310 * to the user by some API. 311 * 312 * This list must be kept in sync with COUNTRIES_3, with corresponding 313 * entries matched. 314 * 315 * This table should be terminated with a NULL entry, followed by a 316 * second list, and another NULL entry. The first list is visible to 317 * user code when this array is returned by API. The second list 318 * contains codes we support, but do not expose through user API. 319 * 320 * Notes: 321 * 322 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per 323 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added 324 * new codes keeping the old ones for compatibility updated to include 325 * 1999/12/03 revisions *CWB* 326 * 327 * RO(ROM) is now RO(ROU) according to 328 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html 329 */ 330static const char * const COUNTRIES[] = { 331 "AD", "AE", "AF", "AG", "AI", "AL", "AM", 332 "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", 333 "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", 334 "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", 335 "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", 336 "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", 337 "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", 338 "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", 339 "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", 340 "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", 341 "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", 342 "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", 343 "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", 344 "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", 345 "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", 346 "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", 347 "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", 348 "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", 349 "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", 350 "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", 351 "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", 352 "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", 353 "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", 354 "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", 355 "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", 356 "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", 357 "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", 358 "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", 359 "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", 360 "WS", "YE", "YT", "ZA", "ZM", "ZW", 361NULL, 362 "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */ 363NULL 364}; 365 366static const char* const DEPRECATED_COUNTRIES[] = { 367 "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */ 368}; 369static const char* const REPLACEMENT_COUNTRIES[] = { 370/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */ 371 "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */ 372}; 373 374/** 375 * Table of 3-letter country codes. 376 * 377 * This is a lookup table used to convert 3-letter country codes to 378 * their 2-letter equivalent. It must be kept in sync with COUNTRIES. 379 * For all valid i, COUNTRIES[i] must refer to the same country as 380 * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES 381 * to make eyeballing this baby easier. 382 * 383 * This table should be terminated with a NULL entry, followed by a 384 * second list, and another NULL entry. The two lists correspond to 385 * the two lists in COUNTRIES. 386 */ 387static const char * const COUNTRIES_3[] = { 388/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */ 389 "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", 390/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ 391 "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", 392/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ 393 "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", 394/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */ 395 "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT", 396/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ 397 "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", 398/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ 399 "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", 400/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */ 401 "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", 402/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ 403 "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", 404/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ 405 "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", 406/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ 407 "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", 408/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ 409 "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", 410/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ 411 "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", 412/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ 413 "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", 414/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ 415 "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", 416/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ 417 "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", 418/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ 419 "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", 420/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ 421 "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", 422/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ 423 "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", 424/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ 425 "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", 426/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ 427 "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", 428/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ 429 "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", 430/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ 431 "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", 432/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ 433 "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", 434/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ 435 "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", 436/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */ 437 "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV", 438/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ 439 "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", 440/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ 441 "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", 442/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ 443 "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", 444/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ 445 "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", 446/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ 447 "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", 448NULL, 449/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ 450 "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", 451NULL 452}; 453 454typedef struct CanonicalizationMap { 455 const char *id; /* input ID */ 456 const char *canonicalID; /* canonicalized output ID */ 457 const char *keyword; /* keyword, or NULL if none */ 458 const char *value; /* keyword value, or NULL if kw==NULL */ 459} CanonicalizationMap; 460 461/** 462 * A map to canonicalize locale IDs. This handles a variety of 463 * different semantic kinds of transformations. 464 */ 465static const CanonicalizationMap CANONICALIZE_MAP[] = { 466 { "", "en_US_POSIX", NULL, NULL }, /* .NET name */ 467 { "c", "en_US_POSIX", NULL, NULL }, /* POSIX name */ 468 { "posix", "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */ 469 { "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */ 470 { "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */ 471 { "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */ 472 { "ca_ES_PREEURO", "ca_ES", "currency", "ESP" }, 473 { "de__PHONEBOOK", "de", "collation", "phonebook" }, /* Old ICU name */ 474 { "de_AT_PREEURO", "de_AT", "currency", "ATS" }, 475 { "de_DE_PREEURO", "de_DE", "currency", "DEM" }, 476 { "de_LU_PREEURO", "de_LU", "currency", "LUF" }, 477 { "el_GR_PREEURO", "el_GR", "currency", "GRD" }, 478 { "en_BE_PREEURO", "en_BE", "currency", "BEF" }, 479 { "en_IE_PREEURO", "en_IE", "currency", "IEP" }, 480 { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */ 481 { "es_ES_PREEURO", "es_ES", "currency", "ESP" }, 482 { "eu_ES_PREEURO", "eu_ES", "currency", "ESP" }, 483 { "fi_FI_PREEURO", "fi_FI", "currency", "FIM" }, 484 { "fr_BE_PREEURO", "fr_BE", "currency", "BEF" }, 485 { "fr_FR_PREEURO", "fr_FR", "currency", "FRF" }, 486 { "fr_LU_PREEURO", "fr_LU", "currency", "LUF" }, 487 { "ga_IE_PREEURO", "ga_IE", "currency", "IEP" }, 488 { "gl_ES_PREEURO", "gl_ES", "currency", "ESP" }, 489 { "hi__DIRECT", "hi", "collation", "direct" }, /* Old ICU name */ 490 { "it_IT_PREEURO", "it_IT", "currency", "ITL" }, 491 { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */ 492 { "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */ 493 { "nl_BE_PREEURO", "nl_BE", "currency", "BEF" }, 494 { "nl_NL_PREEURO", "nl_NL", "currency", "NLG" }, 495 { "pt_PT_PREEURO", "pt_PT", "currency", "PTE" }, 496 { "sr_SP_CYRL", "sr_Cyrl_RS", NULL, NULL }, /* .NET name */ 497 { "sr_SP_LATN", "sr_Latn_RS", NULL, NULL }, /* .NET name */ 498 { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */ 499 { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */ 500 { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */ 501 { "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */ 502 { "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */ 503 { "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */ 504 { "zh_CHT", "zh_Hant", NULL, NULL }, /* .NET name */ 505 { "zh_GAN", "gan", NULL, NULL }, /* registered name */ 506 { "zh_GUOYU", "zh", NULL, NULL }, /* registered name */ 507 { "zh_HAKKA", "hak", NULL, NULL }, /* registered name */ 508 { "zh_MIN_NAN", "nan", NULL, NULL }, /* registered name */ 509 { "zh_WUU", "wuu", NULL, NULL }, /* registered name */ 510 { "zh_XIANG", "hsn", NULL, NULL }, /* registered name */ 511 { "zh_YUE", "yue", NULL, NULL }, /* registered name */ 512}; 513 514typedef struct VariantMap { 515 const char *variant; /* input ID */ 516 const char *keyword; /* keyword, or NULL if none */ 517 const char *value; /* keyword value, or NULL if kw==NULL */ 518} VariantMap; 519 520static const VariantMap VARIANT_MAP[] = { 521 { "EURO", "currency", "EUR" }, 522 { "PINYIN", "collation", "pinyin" }, /* Solaris variant */ 523 { "STROKE", "collation", "stroke" } /* Solaris variant */ 524}; 525 526/* ### BCP47 Conversion *******************************************/ 527/* Test if the locale id has BCP47 u extension and does not have '@' */ 528#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) 529/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ 530#define _ConvertBCP47(finalID, id, buffer, length,err) \ 531 if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \ 532 finalID=id; \ 533 } else { \ 534 finalID=buffer; \ 535 } 536/* Gets the size of the shortest subtag in the given localeID. */ 537static int32_t getShortestSubtagLength(const char *localeID) { 538 int32_t localeIDLength = uprv_strlen(localeID); 539 int32_t length = localeIDLength; 540 int32_t tmpLength = 0; 541 int32_t i; 542 UBool reset = TRUE; 543 544 for (i = 0; i < localeIDLength; i++) { 545 if (localeID[i] != '_' && localeID[i] != '-') { 546 if (reset) { 547 tmpLength = 0; 548 reset = FALSE; 549 } 550 tmpLength++; 551 } else { 552 if (tmpLength != 0 && tmpLength < length) { 553 length = tmpLength; 554 } 555 reset = TRUE; 556 } 557 } 558 559 return length; 560} 561 562/* ### Keywords **************************************************/ 563 564#define ULOC_KEYWORD_BUFFER_LEN 25 565#define ULOC_MAX_NO_KEYWORDS 25 566 567U_CAPI const char * U_EXPORT2 568locale_getKeywordsStart(const char *localeID) { 569 const char *result = NULL; 570 if((result = uprv_strchr(localeID, '@')) != NULL) { 571 return result; 572 } 573#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) 574 else { 575 /* We do this because the @ sign is variant, and the @ sign used on one 576 EBCDIC machine won't be compiled the same way on other EBCDIC based 577 machines. */ 578 static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; 579 const uint8_t *charToFind = ebcdicSigns; 580 while(*charToFind) { 581 if((result = uprv_strchr(localeID, *charToFind)) != NULL) { 582 return result; 583 } 584 charToFind++; 585 } 586 } 587#endif 588 return NULL; 589} 590 591/** 592 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] 593 * @param keywordName incoming name to be canonicalized 594 * @param status return status (keyword too long) 595 * @return length of the keyword name 596 */ 597static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) 598{ 599 int32_t i; 600 int32_t keywordNameLen = (int32_t)uprv_strlen(keywordName); 601 602 if(keywordNameLen >= ULOC_KEYWORD_BUFFER_LEN) { 603 /* keyword name too long for internal buffer */ 604 *status = U_INTERNAL_PROGRAM_ERROR; 605 return 0; 606 } 607 608 /* normalize the keyword name */ 609 for(i = 0; i < keywordNameLen; i++) { 610 buf[i] = uprv_tolower(keywordName[i]); 611 } 612 buf[i] = 0; 613 614 return keywordNameLen; 615} 616 617typedef struct { 618 char keyword[ULOC_KEYWORD_BUFFER_LEN]; 619 int32_t keywordLen; 620 const char *valueStart; 621 int32_t valueLen; 622} KeywordStruct; 623 624static int32_t U_CALLCONV 625compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { 626 const char* leftString = ((const KeywordStruct *)left)->keyword; 627 const char* rightString = ((const KeywordStruct *)right)->keyword; 628 return uprv_strcmp(leftString, rightString); 629} 630 631/** 632 * Both addKeyword and addValue must already be in canonical form. 633 * Either both addKeyword and addValue are NULL, or neither is NULL. 634 * If they are not NULL they must be zero terminated. 635 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword. 636 */ 637static int32_t 638_getKeywords(const char *localeID, 639 char prev, 640 char *keywords, int32_t keywordCapacity, 641 char *values, int32_t valuesCapacity, int32_t *valLen, 642 UBool valuesToo, 643 const char* addKeyword, 644 const char* addValue, 645 UErrorCode *status) 646{ 647 KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; 648 649 int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; 650 int32_t numKeywords = 0; 651 const char* pos = localeID; 652 const char* equalSign = NULL; 653 const char* semicolon = NULL; 654 int32_t i = 0, j, n; 655 int32_t keywordsLen = 0; 656 int32_t valuesLen = 0; 657 658 if(prev == '@') { /* start of keyword definition */ 659 /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ 660 do { 661 UBool duplicate = FALSE; 662 /* skip leading spaces */ 663 while(*pos == ' ') { 664 pos++; 665 } 666 if (!*pos) { /* handle trailing "; " */ 667 break; 668 } 669 if(numKeywords == maxKeywords) { 670 *status = U_INTERNAL_PROGRAM_ERROR; 671 return 0; 672 } 673 equalSign = uprv_strchr(pos, '='); 674 semicolon = uprv_strchr(pos, ';'); 675 /* lack of '=' [foo@currency] is illegal */ 676 /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ 677 if(!equalSign || (semicolon && semicolon<equalSign)) { 678 *status = U_INVALID_FORMAT_ERROR; 679 return 0; 680 } 681 /* need to normalize both keyword and keyword name */ 682 if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) { 683 /* keyword name too long for internal buffer */ 684 *status = U_INTERNAL_PROGRAM_ERROR; 685 return 0; 686 } 687 for(i = 0, n = 0; i < equalSign - pos; ++i) { 688 if (pos[i] != ' ') { 689 keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); 690 } 691 } 692 693 /* zero-length keyword is an error. */ 694 if (n == 0) { 695 *status = U_INVALID_FORMAT_ERROR; 696 return 0; 697 } 698 699 keywordList[numKeywords].keyword[n] = 0; 700 keywordList[numKeywords].keywordLen = n; 701 /* now grab the value part. First we skip the '=' */ 702 equalSign++; 703 /* then we leading spaces */ 704 while(*equalSign == ' ') { 705 equalSign++; 706 } 707 708 /* Premature end or zero-length value */ 709 if (!*equalSign || equalSign == semicolon) { 710 *status = U_INVALID_FORMAT_ERROR; 711 return 0; 712 } 713 714 keywordList[numKeywords].valueStart = equalSign; 715 716 pos = semicolon; 717 i = 0; 718 if(pos) { 719 while(*(pos - i - 1) == ' ') { 720 i++; 721 } 722 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); 723 pos++; 724 } else { 725 i = (int32_t)uprv_strlen(equalSign); 726 while(i && equalSign[i-1] == ' ') { 727 i--; 728 } 729 keywordList[numKeywords].valueLen = i; 730 } 731 /* If this is a duplicate keyword, then ignore it */ 732 for (j=0; j<numKeywords; ++j) { 733 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) { 734 duplicate = TRUE; 735 break; 736 } 737 } 738 if (!duplicate) { 739 ++numKeywords; 740 } 741 } while(pos); 742 743 /* Handle addKeyword/addValue. */ 744 if (addKeyword != NULL) { 745 UBool duplicate = FALSE; 746 U_ASSERT(addValue != NULL); 747 /* Search for duplicate; if found, do nothing. Explicit keyword 748 overrides addKeyword. */ 749 for (j=0; j<numKeywords; ++j) { 750 if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) { 751 duplicate = TRUE; 752 break; 753 } 754 } 755 if (!duplicate) { 756 if (numKeywords == maxKeywords) { 757 *status = U_INTERNAL_PROGRAM_ERROR; 758 return 0; 759 } 760 uprv_strcpy(keywordList[numKeywords].keyword, addKeyword); 761 keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword); 762 keywordList[numKeywords].valueStart = addValue; 763 keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue); 764 ++numKeywords; 765 } 766 } else { 767 U_ASSERT(addValue == NULL); 768 } 769 770 /* now we have a list of keywords */ 771 /* we need to sort it */ 772 uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status); 773 774 /* Now construct the keyword part */ 775 for(i = 0; i < numKeywords; i++) { 776 if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) { 777 uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword); 778 if(valuesToo) { 779 keywords[keywordsLen + keywordList[i].keywordLen] = '='; 780 } else { 781 keywords[keywordsLen + keywordList[i].keywordLen] = 0; 782 } 783 } 784 keywordsLen += keywordList[i].keywordLen + 1; 785 if(valuesToo) { 786 if(keywordsLen + keywordList[i].valueLen < keywordCapacity) { 787 uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen); 788 } 789 keywordsLen += keywordList[i].valueLen; 790 791 if(i < numKeywords - 1) { 792 if(keywordsLen < keywordCapacity) { 793 keywords[keywordsLen] = ';'; 794 } 795 keywordsLen++; 796 } 797 } 798 if(values) { 799 if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) { 800 uprv_strcpy(values+valuesLen, keywordList[i].valueStart); 801 values[valuesLen + keywordList[i].valueLen] = 0; 802 } 803 valuesLen += keywordList[i].valueLen + 1; 804 } 805 } 806 if(values) { 807 values[valuesLen] = 0; 808 if(valLen) { 809 *valLen = valuesLen; 810 } 811 } 812 return u_terminateChars(keywords, keywordCapacity, keywordsLen, status); 813 } else { 814 return 0; 815 } 816} 817 818U_CFUNC int32_t 819locale_getKeywords(const char *localeID, 820 char prev, 821 char *keywords, int32_t keywordCapacity, 822 char *values, int32_t valuesCapacity, int32_t *valLen, 823 UBool valuesToo, 824 UErrorCode *status) { 825 return _getKeywords(localeID, prev, keywords, keywordCapacity, 826 values, valuesCapacity, valLen, valuesToo, 827 NULL, NULL, status); 828} 829 830U_CAPI int32_t U_EXPORT2 831uloc_getKeywordValue(const char* localeID, 832 const char* keywordName, 833 char* buffer, int32_t bufferCapacity, 834 UErrorCode* status) 835{ 836 const char* startSearchHere = NULL; 837 const char* nextSeparator = NULL; 838 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 839 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 840 int32_t i = 0; 841 int32_t result = 0; 842 843 if(status && U_SUCCESS(*status) && localeID) { 844 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 845 const char* tmpLocaleID; 846 847 if (_hasBCP47Extension(localeID)) { 848 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 849 } else { 850 tmpLocaleID=localeID; 851 } 852 853 startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */ 854 if(startSearchHere == NULL) { 855 /* no keywords, return at once */ 856 return 0; 857 } 858 859 locale_canonKeywordName(keywordNameBuffer, keywordName, status); 860 if(U_FAILURE(*status)) { 861 return 0; 862 } 863 864 /* find the first keyword */ 865 while(startSearchHere) { 866 startSearchHere++; 867 /* skip leading spaces (allowed?) */ 868 while(*startSearchHere == ' ') { 869 startSearchHere++; 870 } 871 nextSeparator = uprv_strchr(startSearchHere, '='); 872 /* need to normalize both keyword and keyword name */ 873 if(!nextSeparator) { 874 break; 875 } 876 if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) { 877 /* keyword name too long for internal buffer */ 878 *status = U_INTERNAL_PROGRAM_ERROR; 879 return 0; 880 } 881 for(i = 0; i < nextSeparator - startSearchHere; i++) { 882 localeKeywordNameBuffer[i] = uprv_tolower(startSearchHere[i]); 883 } 884 /* trim trailing spaces */ 885 while(startSearchHere[i-1] == ' ') { 886 i--; 887 U_ASSERT(i>=0); 888 } 889 localeKeywordNameBuffer[i] = 0; 890 891 startSearchHere = uprv_strchr(nextSeparator, ';'); 892 893 if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { 894 nextSeparator++; 895 while(*nextSeparator == ' ') { 896 nextSeparator++; 897 } 898 /* we actually found the keyword. Copy the value */ 899 if(startSearchHere && startSearchHere - nextSeparator < bufferCapacity) { 900 while(*(startSearchHere-1) == ' ') { 901 startSearchHere--; 902 } 903 uprv_strncpy(buffer, nextSeparator, startSearchHere - nextSeparator); 904 result = u_terminateChars(buffer, bufferCapacity, (int32_t)(startSearchHere - nextSeparator), status); 905 } else if(!startSearchHere && (int32_t)uprv_strlen(nextSeparator) < bufferCapacity) { /* last item in string */ 906 i = (int32_t)uprv_strlen(nextSeparator); 907 while(nextSeparator[i - 1] == ' ') { 908 i--; 909 } 910 uprv_strncpy(buffer, nextSeparator, i); 911 result = u_terminateChars(buffer, bufferCapacity, i, status); 912 } else { 913 /* give a bigger buffer, please */ 914 *status = U_BUFFER_OVERFLOW_ERROR; 915 if(startSearchHere) { 916 result = (int32_t)(startSearchHere - nextSeparator); 917 } else { 918 result = (int32_t)uprv_strlen(nextSeparator); 919 } 920 } 921 return result; 922 } 923 } 924 } 925 return 0; 926} 927 928U_CAPI int32_t U_EXPORT2 929uloc_setKeywordValue(const char* keywordName, 930 const char* keywordValue, 931 char* buffer, int32_t bufferCapacity, 932 UErrorCode* status) 933{ 934 /* TODO: sorting. removal. */ 935 int32_t keywordNameLen; 936 int32_t keywordValueLen; 937 int32_t bufLen; 938 int32_t needLen = 0; 939 int32_t foundValueLen; 940 int32_t keywordAtEnd = 0; /* is the keyword at the end of the string? */ 941 char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 942 char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; 943 int32_t i = 0; 944 int32_t rc; 945 char* nextSeparator = NULL; 946 char* nextEqualsign = NULL; 947 char* startSearchHere = NULL; 948 char* keywordStart = NULL; 949 char *insertHere = NULL; 950 if(U_FAILURE(*status)) { 951 return -1; 952 } 953 if(bufferCapacity>1) { 954 bufLen = (int32_t)uprv_strlen(buffer); 955 } else { 956 *status = U_ILLEGAL_ARGUMENT_ERROR; 957 return 0; 958 } 959 if(bufferCapacity<bufLen) { 960 /* The capacity is less than the length?! Is this NULL terminated? */ 961 *status = U_ILLEGAL_ARGUMENT_ERROR; 962 return 0; 963 } 964 if(keywordValue && !*keywordValue) { 965 keywordValue = NULL; 966 } 967 if(keywordValue) { 968 keywordValueLen = (int32_t)uprv_strlen(keywordValue); 969 } else { 970 keywordValueLen = 0; 971 } 972 keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status); 973 if(U_FAILURE(*status)) { 974 return 0; 975 } 976 startSearchHere = (char*)locale_getKeywordsStart(buffer); 977 if(startSearchHere == NULL || (startSearchHere[1]==0)) { 978 if(!keywordValue) { /* no keywords = nothing to remove */ 979 return bufLen; 980 } 981 982 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 983 if(startSearchHere) { /* had a single @ */ 984 needLen--; /* already had the @ */ 985 /* startSearchHere points at the @ */ 986 } else { 987 startSearchHere=buffer+bufLen; 988 } 989 if(needLen >= bufferCapacity) { 990 *status = U_BUFFER_OVERFLOW_ERROR; 991 return needLen; /* no change */ 992 } 993 *startSearchHere = '@'; 994 startSearchHere++; 995 uprv_strcpy(startSearchHere, keywordNameBuffer); 996 startSearchHere += keywordNameLen; 997 *startSearchHere = '='; 998 startSearchHere++; 999 uprv_strcpy(startSearchHere, keywordValue); 1000 startSearchHere+=keywordValueLen; 1001 return needLen; 1002 } /* end shortcut - no @ */ 1003 1004 keywordStart = startSearchHere; 1005 /* search for keyword */ 1006 while(keywordStart) { 1007 keywordStart++; 1008 /* skip leading spaces (allowed?) */ 1009 while(*keywordStart == ' ') { 1010 keywordStart++; 1011 } 1012 nextEqualsign = uprv_strchr(keywordStart, '='); 1013 /* need to normalize both keyword and keyword name */ 1014 if(!nextEqualsign) { 1015 break; 1016 } 1017 if(nextEqualsign - keywordStart >= ULOC_KEYWORD_BUFFER_LEN) { 1018 /* keyword name too long for internal buffer */ 1019 *status = U_INTERNAL_PROGRAM_ERROR; 1020 return 0; 1021 } 1022 for(i = 0; i < nextEqualsign - keywordStart; i++) { 1023 localeKeywordNameBuffer[i] = uprv_tolower(keywordStart[i]); 1024 } 1025 /* trim trailing spaces */ 1026 while(keywordStart[i-1] == ' ') { 1027 i--; 1028 } 1029 U_ASSERT(i>=0 && i<ULOC_KEYWORD_BUFFER_LEN); 1030 localeKeywordNameBuffer[i] = 0; 1031 1032 nextSeparator = uprv_strchr(nextEqualsign, ';'); 1033 rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); 1034 if(rc == 0) { 1035 nextEqualsign++; 1036 while(*nextEqualsign == ' ') { 1037 nextEqualsign++; 1038 } 1039 /* we actually found the keyword. Change the value */ 1040 if (nextSeparator) { 1041 keywordAtEnd = 0; 1042 foundValueLen = (int32_t)(nextSeparator - nextEqualsign); 1043 } else { 1044 keywordAtEnd = 1; 1045 foundValueLen = (int32_t)uprv_strlen(nextEqualsign); 1046 } 1047 if(keywordValue) { /* adding a value - not removing */ 1048 if(foundValueLen == keywordValueLen) { 1049 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1050 return bufLen; /* no change in size */ 1051 } else if(foundValueLen > keywordValueLen) { 1052 int32_t delta = foundValueLen - keywordValueLen; 1053 if(nextSeparator) { /* RH side */ 1054 uprv_memmove(nextSeparator - delta, nextSeparator, bufLen-(nextSeparator-buffer)); 1055 } 1056 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1057 bufLen -= delta; 1058 buffer[bufLen]=0; 1059 return bufLen; 1060 } else { /* FVL < KVL */ 1061 int32_t delta = keywordValueLen - foundValueLen; 1062 if((bufLen+delta) >= bufferCapacity) { 1063 *status = U_BUFFER_OVERFLOW_ERROR; 1064 return bufLen+delta; 1065 } 1066 if(nextSeparator) { /* RH side */ 1067 uprv_memmove(nextSeparator+delta,nextSeparator, bufLen-(nextSeparator-buffer)); 1068 } 1069 uprv_strncpy(nextEqualsign, keywordValue, keywordValueLen); 1070 bufLen += delta; 1071 buffer[bufLen]=0; 1072 return bufLen; 1073 } 1074 } else { /* removing a keyword */ 1075 if(keywordAtEnd) { 1076 /* zero out the ';' or '@' just before startSearchhere */ 1077 keywordStart[-1] = 0; 1078 return (int32_t)((keywordStart-buffer)-1); /* (string length without keyword) minus separator */ 1079 } else { 1080 uprv_memmove(keywordStart, nextSeparator+1, bufLen-((nextSeparator+1)-buffer)); 1081 keywordStart[bufLen-((nextSeparator+1)-buffer)]=0; 1082 return (int32_t)(bufLen-((nextSeparator+1)-keywordStart)); 1083 } 1084 } 1085 } else if(rc<0){ /* end match keyword */ 1086 /* could insert at this location. */ 1087 insertHere = keywordStart; 1088 } 1089 keywordStart = nextSeparator; 1090 } /* end loop searching */ 1091 1092 if(!keywordValue) { 1093 return bufLen; /* removal of non-extant keyword - no change */ 1094 } 1095 1096 /* we know there is at least one keyword. */ 1097 needLen = bufLen+1+keywordNameLen+1+keywordValueLen; 1098 if(needLen >= bufferCapacity) { 1099 *status = U_BUFFER_OVERFLOW_ERROR; 1100 return needLen; /* no change */ 1101 } 1102 1103 if(insertHere) { 1104 uprv_memmove(insertHere+(1+keywordNameLen+1+keywordValueLen), insertHere, bufLen-(insertHere-buffer)); 1105 keywordStart = insertHere; 1106 } else { 1107 keywordStart = buffer+bufLen; 1108 *keywordStart = ';'; 1109 keywordStart++; 1110 } 1111 uprv_strncpy(keywordStart, keywordNameBuffer, keywordNameLen); 1112 keywordStart += keywordNameLen; 1113 *keywordStart = '='; 1114 keywordStart++; 1115 uprv_strncpy(keywordStart, keywordValue, keywordValueLen); /* terminates. */ 1116 keywordStart+=keywordValueLen; 1117 if(insertHere) { 1118 *keywordStart = ';'; 1119 keywordStart++; 1120 } 1121 buffer[needLen]=0; 1122 return needLen; 1123} 1124 1125/* ### ID parsing implementation **************************************************/ 1126 1127#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) 1128 1129/*returns TRUE if one of the special prefixes is here (s=string) 1130 'x-' or 'i-' */ 1131#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) 1132 1133/* Dot terminates it because of POSIX form where dot precedes the codepage 1134 * except for variant 1135 */ 1136#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) 1137 1138static char* _strnchr(const char* str, int32_t len, char c) { 1139 U_ASSERT(str != 0 && len >= 0); 1140 while (len-- != 0) { 1141 char d = *str; 1142 if (d == c) { 1143 return (char*) str; 1144 } else if (d == 0) { 1145 break; 1146 } 1147 ++str; 1148 } 1149 return NULL; 1150} 1151 1152/** 1153 * Lookup 'key' in the array 'list'. The array 'list' should contain 1154 * a NULL entry, followed by more entries, and a second NULL entry. 1155 * 1156 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or 1157 * COUNTRIES_3. 1158 */ 1159static int16_t _findIndex(const char* const* list, const char* key) 1160{ 1161 const char* const* anchor = list; 1162 int32_t pass = 0; 1163 1164 /* Make two passes through two NULL-terminated arrays at 'list' */ 1165 while (pass++ < 2) { 1166 while (*list) { 1167 if (uprv_strcmp(key, *list) == 0) { 1168 return (int16_t)(list - anchor); 1169 } 1170 list++; 1171 } 1172 ++list; /* skip final NULL *CWB*/ 1173 } 1174 return -1; 1175} 1176 1177/* count the length of src while copying it to dest; return strlen(src) */ 1178static inline int32_t 1179_copyCount(char *dest, int32_t destCapacity, const char *src) { 1180 const char *anchor; 1181 char c; 1182 1183 anchor=src; 1184 for(;;) { 1185 if((c=*src)==0) { 1186 return (int32_t)(src-anchor); 1187 } 1188 if(destCapacity<=0) { 1189 return (int32_t)((src-anchor)+uprv_strlen(src)); 1190 } 1191 ++src; 1192 *dest++=c; 1193 --destCapacity; 1194 } 1195} 1196 1197U_CFUNC const char* 1198uloc_getCurrentCountryID(const char* oldID){ 1199 int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); 1200 if (offset >= 0) { 1201 return REPLACEMENT_COUNTRIES[offset]; 1202 } 1203 return oldID; 1204} 1205U_CFUNC const char* 1206uloc_getCurrentLanguageID(const char* oldID){ 1207 int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID); 1208 if (offset >= 0) { 1209 return REPLACEMENT_LANGUAGES[offset]; 1210 } 1211 return oldID; 1212} 1213/* 1214 * the internal functions _getLanguage(), _getCountry(), _getVariant() 1215 * avoid duplicating code to handle the earlier locale ID pieces 1216 * in the functions for the later ones by 1217 * setting the *pEnd pointer to where they stopped parsing 1218 * 1219 * TODO try to use this in Locale 1220 */ 1221U_CFUNC int32_t 1222ulocimp_getLanguage(const char *localeID, 1223 char *language, int32_t languageCapacity, 1224 const char **pEnd) { 1225 int32_t i=0; 1226 int32_t offset; 1227 char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */ 1228 1229 /* if it starts with i- or x- then copy that prefix */ 1230 if(_isIDPrefix(localeID)) { 1231 if(i<languageCapacity) { 1232 language[i]=(char)uprv_tolower(*localeID); 1233 } 1234 if(i<languageCapacity) { 1235 language[i+1]='-'; 1236 } 1237 i+=2; 1238 localeID+=2; 1239 } 1240 1241 /* copy the language as far as possible and count its length */ 1242 while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { 1243 if(i<languageCapacity) { 1244 language[i]=(char)uprv_tolower(*localeID); 1245 } 1246 if(i<3) { 1247 U_ASSERT(i>=0); 1248 lang[i]=(char)uprv_tolower(*localeID); 1249 } 1250 i++; 1251 localeID++; 1252 } 1253 1254 if(i==3) { 1255 /* convert 3 character code to 2 character code if possible *CWB*/ 1256 offset=_findIndex(LANGUAGES_3, lang); 1257 if(offset>=0) { 1258 i=_copyCount(language, languageCapacity, LANGUAGES[offset]); 1259 } 1260 } 1261 1262 if(pEnd!=NULL) { 1263 *pEnd=localeID; 1264 } 1265 return i; 1266} 1267 1268U_CFUNC int32_t 1269ulocimp_getScript(const char *localeID, 1270 char *script, int32_t scriptCapacity, 1271 const char **pEnd) 1272{ 1273 int32_t idLen = 0; 1274 1275 if (pEnd != NULL) { 1276 *pEnd = localeID; 1277 } 1278 1279 /* copy the second item as far as possible and count its length */ 1280 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen]) 1281 && uprv_isASCIILetter(localeID[idLen])) { 1282 idLen++; 1283 } 1284 1285 /* If it's exactly 4 characters long, then it's a script and not a country. */ 1286 if (idLen == 4) { 1287 int32_t i; 1288 if (pEnd != NULL) { 1289 *pEnd = localeID+idLen; 1290 } 1291 if(idLen > scriptCapacity) { 1292 idLen = scriptCapacity; 1293 } 1294 if (idLen >= 1) { 1295 script[0]=(char)uprv_toupper(*(localeID++)); 1296 } 1297 for (i = 1; i < idLen; i++) { 1298 script[i]=(char)uprv_tolower(*(localeID++)); 1299 } 1300 } 1301 else { 1302 idLen = 0; 1303 } 1304 return idLen; 1305} 1306 1307U_CFUNC int32_t 1308ulocimp_getCountry(const char *localeID, 1309 char *country, int32_t countryCapacity, 1310 const char **pEnd) 1311{ 1312 int32_t idLen=0; 1313 char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 }; 1314 int32_t offset; 1315 1316 /* copy the country as far as possible and count its length */ 1317 while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { 1318 if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/ 1319 cnty[idLen]=(char)uprv_toupper(localeID[idLen]); 1320 } 1321 idLen++; 1322 } 1323 1324 /* the country should be either length 2 or 3 */ 1325 if (idLen == 2 || idLen == 3) { 1326 UBool gotCountry = FALSE; 1327 /* convert 3 character code to 2 character code if possible *CWB*/ 1328 if(idLen==3) { 1329 offset=_findIndex(COUNTRIES_3, cnty); 1330 if(offset>=0) { 1331 idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]); 1332 gotCountry = TRUE; 1333 } 1334 } 1335 if (!gotCountry) { 1336 int32_t i = 0; 1337 for (i = 0; i < idLen; i++) { 1338 if (i < countryCapacity) { 1339 country[i]=(char)uprv_toupper(localeID[i]); 1340 } 1341 } 1342 } 1343 localeID+=idLen; 1344 } else { 1345 idLen = 0; 1346 } 1347 1348 if(pEnd!=NULL) { 1349 *pEnd=localeID; 1350 } 1351 1352 return idLen; 1353} 1354 1355/** 1356 * @param needSeparator if true, then add leading '_' if any variants 1357 * are added to 'variant' 1358 */ 1359static int32_t 1360_getVariantEx(const char *localeID, 1361 char prev, 1362 char *variant, int32_t variantCapacity, 1363 UBool needSeparator) { 1364 int32_t i=0; 1365 1366 /* get one or more variant tags and separate them with '_' */ 1367 if(_isIDSeparator(prev)) { 1368 /* get a variant string after a '-' or '_' */ 1369 while(!_isTerminator(*localeID)) { 1370 if (needSeparator) { 1371 if (i<variantCapacity) { 1372 variant[i] = '_'; 1373 } 1374 ++i; 1375 needSeparator = FALSE; 1376 } 1377 if(i<variantCapacity) { 1378 variant[i]=(char)uprv_toupper(*localeID); 1379 if(variant[i]=='-') { 1380 variant[i]='_'; 1381 } 1382 } 1383 i++; 1384 localeID++; 1385 } 1386 } 1387 1388 /* if there is no variant tag after a '-' or '_' then look for '@' */ 1389 if(i==0) { 1390 if(prev=='@') { 1391 /* keep localeID */ 1392 } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) { 1393 ++localeID; /* point after the '@' */ 1394 } else { 1395 return 0; 1396 } 1397 while(!_isTerminator(*localeID)) { 1398 if (needSeparator) { 1399 if (i<variantCapacity) { 1400 variant[i] = '_'; 1401 } 1402 ++i; 1403 needSeparator = FALSE; 1404 } 1405 if(i<variantCapacity) { 1406 variant[i]=(char)uprv_toupper(*localeID); 1407 if(variant[i]=='-' || variant[i]==',') { 1408 variant[i]='_'; 1409 } 1410 } 1411 i++; 1412 localeID++; 1413 } 1414 } 1415 1416 return i; 1417} 1418 1419static int32_t 1420_getVariant(const char *localeID, 1421 char prev, 1422 char *variant, int32_t variantCapacity) { 1423 return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE); 1424} 1425 1426/** 1427 * Delete ALL instances of a variant from the given list of one or 1428 * more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR". 1429 * @param variants the source string of one or more variants, 1430 * separated by '_'. This will be MODIFIED IN PLACE. Not zero 1431 * terminated; if it is, trailing zero will NOT be maintained. 1432 * @param variantsLen length of variants 1433 * @param toDelete variant to delete, without separators, e.g. "EURO" 1434 * or "PREEURO"; not zero terminated 1435 * @param toDeleteLen length of toDelete 1436 * @return number of characters deleted from variants 1437 */ 1438static int32_t 1439_deleteVariant(char* variants, int32_t variantsLen, 1440 const char* toDelete, int32_t toDeleteLen) 1441{ 1442 int32_t delta = 0; /* number of chars deleted */ 1443 for (;;) { 1444 UBool flag = FALSE; 1445 if (variantsLen < toDeleteLen) { 1446 return delta; 1447 } 1448 if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 && 1449 (variantsLen == toDeleteLen || 1450 (flag=(variants[toDeleteLen] == '_')))) 1451 { 1452 int32_t d = toDeleteLen + (flag?1:0); 1453 variantsLen -= d; 1454 delta += d; 1455 if (variantsLen > 0) { 1456 uprv_memmove(variants, variants+d, variantsLen); 1457 } 1458 } else { 1459 char* p = _strnchr(variants, variantsLen, '_'); 1460 if (p == NULL) { 1461 return delta; 1462 } 1463 ++p; 1464 variantsLen -= (int32_t)(p - variants); 1465 variants = p; 1466 } 1467 } 1468} 1469 1470/* Keyword enumeration */ 1471 1472typedef struct UKeywordsContext { 1473 char* keywords; 1474 char* current; 1475} UKeywordsContext; 1476 1477U_CDECL_BEGIN 1478 1479static void U_CALLCONV 1480uloc_kw_closeKeywords(UEnumeration *enumerator) { 1481 uprv_free(((UKeywordsContext *)enumerator->context)->keywords); 1482 uprv_free(enumerator->context); 1483 uprv_free(enumerator); 1484} 1485 1486static int32_t U_CALLCONV 1487uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) { 1488 char *kw = ((UKeywordsContext *)en->context)->keywords; 1489 int32_t result = 0; 1490 while(*kw) { 1491 result++; 1492 kw += uprv_strlen(kw)+1; 1493 } 1494 return result; 1495} 1496 1497static const char * U_CALLCONV 1498uloc_kw_nextKeyword(UEnumeration* en, 1499 int32_t* resultLength, 1500 UErrorCode* /*status*/) { 1501 const char* result = ((UKeywordsContext *)en->context)->current; 1502 int32_t len = 0; 1503 if(*result) { 1504 len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current); 1505 ((UKeywordsContext *)en->context)->current += len+1; 1506 } else { 1507 result = NULL; 1508 } 1509 if (resultLength) { 1510 *resultLength = len; 1511 } 1512 return result; 1513} 1514 1515static void U_CALLCONV 1516uloc_kw_resetKeywords(UEnumeration* en, 1517 UErrorCode* /*status*/) { 1518 ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords; 1519} 1520 1521U_CDECL_END 1522 1523 1524static const UEnumeration gKeywordsEnum = { 1525 NULL, 1526 NULL, 1527 uloc_kw_closeKeywords, 1528 uloc_kw_countKeywords, 1529 uenum_unextDefault, 1530 uloc_kw_nextKeyword, 1531 uloc_kw_resetKeywords 1532}; 1533 1534U_CAPI UEnumeration* U_EXPORT2 1535uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) 1536{ 1537 UKeywordsContext *myContext = NULL; 1538 UEnumeration *result = NULL; 1539 1540 if(U_FAILURE(*status)) { 1541 return NULL; 1542 } 1543 result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 1544 /* Null pointer test */ 1545 if (result == NULL) { 1546 *status = U_MEMORY_ALLOCATION_ERROR; 1547 return NULL; 1548 } 1549 uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration)); 1550 myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))); 1551 if (myContext == NULL) { 1552 *status = U_MEMORY_ALLOCATION_ERROR; 1553 uprv_free(result); 1554 return NULL; 1555 } 1556 myContext->keywords = (char *)uprv_malloc(keywordListSize+1); 1557 uprv_memcpy(myContext->keywords, keywordList, keywordListSize); 1558 myContext->keywords[keywordListSize] = 0; 1559 myContext->current = myContext->keywords; 1560 result->context = myContext; 1561 return result; 1562} 1563 1564U_CAPI UEnumeration* U_EXPORT2 1565uloc_openKeywords(const char* localeID, 1566 UErrorCode* status) 1567{ 1568 int32_t i=0; 1569 char keywords[256]; 1570 int32_t keywordsCapacity = 256; 1571 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1572 const char* tmpLocaleID; 1573 1574 if(status==NULL || U_FAILURE(*status)) { 1575 return 0; 1576 } 1577 1578 if (_hasBCP47Extension(localeID)) { 1579 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); 1580 } else { 1581 if (localeID==NULL) { 1582 localeID=uloc_getDefault(); 1583 } 1584 tmpLocaleID=localeID; 1585 } 1586 1587 /* Skip the language */ 1588 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 1589 if(_isIDSeparator(*tmpLocaleID)) { 1590 const char *scriptID; 1591 /* Skip the script if available */ 1592 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 1593 if(scriptID != tmpLocaleID+1) { 1594 /* Found optional script */ 1595 tmpLocaleID = scriptID; 1596 } 1597 /* Skip the Country */ 1598 if (_isIDSeparator(*tmpLocaleID)) { 1599 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID); 1600 if(_isIDSeparator(*tmpLocaleID)) { 1601 _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0); 1602 } 1603 } 1604 } 1605 1606 /* keywords are located after '@' */ 1607 if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { 1608 i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status); 1609 } 1610 1611 if(i) { 1612 return uloc_openKeywordList(keywords, i, status); 1613 } else { 1614 return NULL; 1615 } 1616} 1617 1618 1619/* bit-flags for 'options' parameter of _canonicalize */ 1620#define _ULOC_STRIP_KEYWORDS 0x2 1621#define _ULOC_CANONICALIZE 0x1 1622 1623#define OPTION_SET(options, mask) ((options & mask) != 0) 1624 1625static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; 1626#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default) 1627 1628/** 1629 * Canonicalize the given localeID, to level 1 or to level 2, 1630 * depending on the options. To specify level 1, pass in options=0. 1631 * To specify level 2, pass in options=_ULOC_CANONICALIZE. 1632 * 1633 * This is the code underlying uloc_getName and uloc_canonicalize. 1634 */ 1635static int32_t 1636_canonicalize(const char* localeID, 1637 char* result, 1638 int32_t resultCapacity, 1639 uint32_t options, 1640 UErrorCode* err) { 1641 int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity; 1642 char localeBuffer[ULOC_FULLNAME_CAPACITY]; 1643 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1644 const char* origLocaleID; 1645 const char* tmpLocaleID; 1646 const char* keywordAssign = NULL; 1647 const char* separatorIndicator = NULL; 1648 const char* addKeyword = NULL; 1649 const char* addValue = NULL; 1650 char* name; 1651 char* variant = NULL; /* pointer into name, or NULL */ 1652 1653 if (U_FAILURE(*err)) { 1654 return 0; 1655 } 1656 1657 if (_hasBCP47Extension(localeID)) { 1658 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 1659 } else { 1660 if (localeID==NULL) { 1661 localeID=uloc_getDefault(); 1662 } 1663 tmpLocaleID=localeID; 1664 } 1665 1666 origLocaleID=tmpLocaleID; 1667 1668 /* if we are doing a full canonicalization, then put results in 1669 localeBuffer, if necessary; otherwise send them to result. */ 1670 if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/ 1671 (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) { 1672 name = localeBuffer; 1673 nameCapacity = (int32_t)sizeof(localeBuffer); 1674 } else { 1675 name = result; 1676 nameCapacity = resultCapacity; 1677 } 1678 1679 /* get all pieces, one after another, and separate with '_' */ 1680 len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID); 1681 1682 if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) { 1683 const char *d = uloc_getDefault(); 1684 1685 len = (int32_t)uprv_strlen(d); 1686 1687 if (name != NULL) { 1688 uprv_strncpy(name, d, len); 1689 } 1690 } else if(_isIDSeparator(*tmpLocaleID)) { 1691 const char *scriptID; 1692 1693 ++fieldCount; 1694 if(len<nameCapacity) { 1695 name[len]='_'; 1696 } 1697 ++len; 1698 1699 scriptSize=ulocimp_getScript(tmpLocaleID+1, 1700 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID); 1701 if(scriptSize > 0) { 1702 /* Found optional script */ 1703 tmpLocaleID = scriptID; 1704 ++fieldCount; 1705 len+=scriptSize; 1706 if (_isIDSeparator(*tmpLocaleID)) { 1707 /* If there is something else, then we add the _ */ 1708 if(len<nameCapacity) { 1709 name[len]='_'; 1710 } 1711 ++len; 1712 } 1713 } 1714 1715 if (_isIDSeparator(*tmpLocaleID)) { 1716 const char *cntryID; 1717 int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, 1718 (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID); 1719 if (cntrySize > 0) { 1720 /* Found optional country */ 1721 tmpLocaleID = cntryID; 1722 len+=cntrySize; 1723 } 1724 if(_isIDSeparator(*tmpLocaleID)) { 1725 /* If there is something else, then we add the _ if we found country before. */ 1726 if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) { 1727 ++fieldCount; 1728 if(len<nameCapacity) { 1729 name[len]='_'; 1730 } 1731 ++len; 1732 } 1733 1734 variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, 1735 (len<nameCapacity ? name+len : NULL), nameCapacity-len); 1736 if (variantSize > 0) { 1737 variant = len<nameCapacity ? name+len : NULL; 1738 len += variantSize; 1739 tmpLocaleID += variantSize + 1; /* skip '_' and variant */ 1740 } 1741 } 1742 } 1743 } 1744 1745 /* Copy POSIX-style charset specifier, if any [mr.utf8] */ 1746 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') { 1747 UBool done = FALSE; 1748 do { 1749 char c = *tmpLocaleID; 1750 switch (c) { 1751 case 0: 1752 case '@': 1753 done = TRUE; 1754 break; 1755 default: 1756 if (len<nameCapacity) { 1757 name[len] = c; 1758 } 1759 ++len; 1760 ++tmpLocaleID; 1761 break; 1762 } 1763 } while (!done); 1764 } 1765 1766 /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';' 1767 After this, tmpLocaleID either points to '@' or is NULL */ 1768 if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) { 1769 keywordAssign = uprv_strchr(tmpLocaleID, '='); 1770 separatorIndicator = uprv_strchr(tmpLocaleID, ';'); 1771 } 1772 1773 /* Copy POSIX-style variant, if any [mr@FOO] */ 1774 if (!OPTION_SET(options, _ULOC_CANONICALIZE) && 1775 tmpLocaleID != NULL && keywordAssign == NULL) { 1776 for (;;) { 1777 char c = *tmpLocaleID; 1778 if (c == 0) { 1779 break; 1780 } 1781 if (len<nameCapacity) { 1782 name[len] = c; 1783 } 1784 ++len; 1785 ++tmpLocaleID; 1786 } 1787 } 1788 1789 if (OPTION_SET(options, _ULOC_CANONICALIZE)) { 1790 /* Handle @FOO variant if @ is present and not followed by = */ 1791 if (tmpLocaleID!=NULL && keywordAssign==NULL) { 1792 int32_t posixVariantSize; 1793 /* Add missing '_' if needed */ 1794 if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) { 1795 do { 1796 if(len<nameCapacity) { 1797 name[len]='_'; 1798 } 1799 ++len; 1800 ++fieldCount; 1801 } while(fieldCount<2); 1802 } 1803 posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len, 1804 (UBool)(variantSize > 0)); 1805 if (posixVariantSize > 0) { 1806 if (variant == NULL) { 1807 variant = name+len; 1808 } 1809 len += posixVariantSize; 1810 variantSize += posixVariantSize; 1811 } 1812 } 1813 1814 /* Handle generic variants first */ 1815 if (variant) { 1816 for (j=0; j<UPRV_LENGTHOF(VARIANT_MAP); j++) { 1817 const char* variantToCompare = VARIANT_MAP[j].variant; 1818 int32_t n = (int32_t)uprv_strlen(variantToCompare); 1819 int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n); 1820 len -= variantLen; 1821 if (variantLen > 0) { 1822 if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */ 1823 --len; 1824 } 1825 addKeyword = VARIANT_MAP[j].keyword; 1826 addValue = VARIANT_MAP[j].value; 1827 break; 1828 } 1829 } 1830 if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */ 1831 --len; 1832 } 1833 } 1834 1835 /* Look up the ID in the canonicalization map */ 1836 for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) { 1837 const char* id = CANONICALIZE_MAP[j].id; 1838 int32_t n = (int32_t)uprv_strlen(id); 1839 if (len == n && uprv_strncmp(name, id, n) == 0) { 1840 if (n == 0 && tmpLocaleID != NULL) { 1841 break; /* Don't remap "" if keywords present */ 1842 } 1843 len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID); 1844 if (CANONICALIZE_MAP[j].keyword) { 1845 addKeyword = CANONICALIZE_MAP[j].keyword; 1846 addValue = CANONICALIZE_MAP[j].value; 1847 } 1848 break; 1849 } 1850 } 1851 } 1852 1853 if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) { 1854 if (tmpLocaleID!=NULL && keywordAssign!=NULL && 1855 (!separatorIndicator || separatorIndicator > keywordAssign)) { 1856 if(len<nameCapacity) { 1857 name[len]='@'; 1858 } 1859 ++len; 1860 ++fieldCount; 1861 len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len, 1862 NULL, 0, NULL, TRUE, addKeyword, addValue, err); 1863 } else if (addKeyword != NULL) { 1864 U_ASSERT(addValue != NULL && len < nameCapacity); 1865 /* inelegant but works -- later make _getKeywords do this? */ 1866 len += _copyCount(name+len, nameCapacity-len, "@"); 1867 len += _copyCount(name+len, nameCapacity-len, addKeyword); 1868 len += _copyCount(name+len, nameCapacity-len, "="); 1869 len += _copyCount(name+len, nameCapacity-len, addValue); 1870 } 1871 } 1872 1873 if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) { 1874 uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len); 1875 } 1876 1877 return u_terminateChars(result, resultCapacity, len, err); 1878} 1879 1880/* ### ID parsing API **************************************************/ 1881 1882U_CAPI int32_t U_EXPORT2 1883uloc_getParent(const char* localeID, 1884 char* parent, 1885 int32_t parentCapacity, 1886 UErrorCode* err) 1887{ 1888 const char *lastUnderscore; 1889 int32_t i; 1890 1891 if (U_FAILURE(*err)) 1892 return 0; 1893 1894 if (localeID == NULL) 1895 localeID = uloc_getDefault(); 1896 1897 lastUnderscore=uprv_strrchr(localeID, '_'); 1898 if(lastUnderscore!=NULL) { 1899 i=(int32_t)(lastUnderscore-localeID); 1900 } else { 1901 i=0; 1902 } 1903 1904 if(i>0 && parent != localeID) { 1905 uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity)); 1906 } 1907 return u_terminateChars(parent, parentCapacity, i, err); 1908} 1909 1910U_CAPI int32_t U_EXPORT2 1911uloc_getLanguage(const char* localeID, 1912 char* language, 1913 int32_t languageCapacity, 1914 UErrorCode* err) 1915{ 1916 /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/ 1917 int32_t i=0; 1918 1919 if (err==NULL || U_FAILURE(*err)) { 1920 return 0; 1921 } 1922 1923 if(localeID==NULL) { 1924 localeID=uloc_getDefault(); 1925 } 1926 1927 i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL); 1928 return u_terminateChars(language, languageCapacity, i, err); 1929} 1930 1931U_CAPI int32_t U_EXPORT2 1932uloc_getScript(const char* localeID, 1933 char* script, 1934 int32_t scriptCapacity, 1935 UErrorCode* err) 1936{ 1937 int32_t i=0; 1938 1939 if(err==NULL || U_FAILURE(*err)) { 1940 return 0; 1941 } 1942 1943 if(localeID==NULL) { 1944 localeID=uloc_getDefault(); 1945 } 1946 1947 /* skip the language */ 1948 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 1949 if(_isIDSeparator(*localeID)) { 1950 i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL); 1951 } 1952 return u_terminateChars(script, scriptCapacity, i, err); 1953} 1954 1955U_CAPI int32_t U_EXPORT2 1956uloc_getCountry(const char* localeID, 1957 char* country, 1958 int32_t countryCapacity, 1959 UErrorCode* err) 1960{ 1961 int32_t i=0; 1962 1963 if(err==NULL || U_FAILURE(*err)) { 1964 return 0; 1965 } 1966 1967 if(localeID==NULL) { 1968 localeID=uloc_getDefault(); 1969 } 1970 1971 /* Skip the language */ 1972 ulocimp_getLanguage(localeID, NULL, 0, &localeID); 1973 if(_isIDSeparator(*localeID)) { 1974 const char *scriptID; 1975 /* Skip the script if available */ 1976 ulocimp_getScript(localeID+1, NULL, 0, &scriptID); 1977 if(scriptID != localeID+1) { 1978 /* Found optional script */ 1979 localeID = scriptID; 1980 } 1981 if(_isIDSeparator(*localeID)) { 1982 i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL); 1983 } 1984 } 1985 return u_terminateChars(country, countryCapacity, i, err); 1986} 1987 1988U_CAPI int32_t U_EXPORT2 1989uloc_getVariant(const char* localeID, 1990 char* variant, 1991 int32_t variantCapacity, 1992 UErrorCode* err) 1993{ 1994 char tempBuffer[ULOC_FULLNAME_CAPACITY]; 1995 const char* tmpLocaleID; 1996 int32_t i=0; 1997 1998 if(err==NULL || U_FAILURE(*err)) { 1999 return 0; 2000 } 2001 2002 if (_hasBCP47Extension(localeID)) { 2003 _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); 2004 } else { 2005 if (localeID==NULL) { 2006 localeID=uloc_getDefault(); 2007 } 2008 tmpLocaleID=localeID; 2009 } 2010 2011 /* Skip the language */ 2012 ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); 2013 if(_isIDSeparator(*tmpLocaleID)) { 2014 const char *scriptID; 2015 /* Skip the script if available */ 2016 ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); 2017 if(scriptID != tmpLocaleID+1) { 2018 /* Found optional script */ 2019 tmpLocaleID = scriptID; 2020 } 2021 /* Skip the Country */ 2022 if (_isIDSeparator(*tmpLocaleID)) { 2023 const char *cntryID; 2024 ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID); 2025 if (cntryID != tmpLocaleID+1) { 2026 /* Found optional country */ 2027 tmpLocaleID = cntryID; 2028 } 2029 if(_isIDSeparator(*tmpLocaleID)) { 2030 /* If there was no country ID, skip a possible extra IDSeparator */ 2031 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) { 2032 tmpLocaleID++; 2033 } 2034 i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity); 2035 } 2036 } 2037 } 2038 2039 /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */ 2040 /* if we do not have a variant tag yet then try a POSIX variant after '@' */ 2041/* 2042 if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) { 2043 i=_getVariant(localeID+1, '@', variant, variantCapacity); 2044 } 2045*/ 2046 return u_terminateChars(variant, variantCapacity, i, err); 2047} 2048 2049U_CAPI int32_t U_EXPORT2 2050uloc_getName(const char* localeID, 2051 char* name, 2052 int32_t nameCapacity, 2053 UErrorCode* err) 2054{ 2055 return _canonicalize(localeID, name, nameCapacity, 0, err); 2056} 2057 2058U_CAPI int32_t U_EXPORT2 2059uloc_getBaseName(const char* localeID, 2060 char* name, 2061 int32_t nameCapacity, 2062 UErrorCode* err) 2063{ 2064 return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err); 2065} 2066 2067U_CAPI int32_t U_EXPORT2 2068uloc_canonicalize(const char* localeID, 2069 char* name, 2070 int32_t nameCapacity, 2071 UErrorCode* err) 2072{ 2073 return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err); 2074} 2075 2076U_CAPI const char* U_EXPORT2 2077uloc_getISO3Language(const char* localeID) 2078{ 2079 int16_t offset; 2080 char lang[ULOC_LANG_CAPACITY]; 2081 UErrorCode err = U_ZERO_ERROR; 2082 2083 if (localeID == NULL) 2084 { 2085 localeID = uloc_getDefault(); 2086 } 2087 uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err); 2088 if (U_FAILURE(err)) 2089 return ""; 2090 offset = _findIndex(LANGUAGES, lang); 2091 if (offset < 0) 2092 return ""; 2093 return LANGUAGES_3[offset]; 2094} 2095 2096U_CAPI const char* U_EXPORT2 2097uloc_getISO3Country(const char* localeID) 2098{ 2099 int16_t offset; 2100 char cntry[ULOC_LANG_CAPACITY]; 2101 UErrorCode err = U_ZERO_ERROR; 2102 2103 if (localeID == NULL) 2104 { 2105 localeID = uloc_getDefault(); 2106 } 2107 uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err); 2108 if (U_FAILURE(err)) 2109 return ""; 2110 offset = _findIndex(COUNTRIES, cntry); 2111 if (offset < 0) 2112 return ""; 2113 2114 return COUNTRIES_3[offset]; 2115} 2116 2117U_CAPI uint32_t U_EXPORT2 2118uloc_getLCID(const char* localeID) 2119{ 2120 UErrorCode status = U_ZERO_ERROR; 2121 char langID[ULOC_FULLNAME_CAPACITY]; 2122 2123 uloc_getLanguage(localeID, langID, sizeof(langID), &status); 2124 if (U_FAILURE(status)) { 2125 return 0; 2126 } 2127 2128 if (uprv_strchr(localeID, '@')) { 2129 // uprv_convertToLCID does not support keywords other than collation. 2130 // Remove all keywords except collation. 2131 int32_t len; 2132 char collVal[ULOC_KEYWORDS_CAPACITY]; 2133 char tmpLocaleID[ULOC_FULLNAME_CAPACITY]; 2134 2135 len = uloc_getKeywordValue(localeID, "collation", collVal, 2136 UPRV_LENGTHOF(collVal) - 1, &status); 2137 2138 if (U_SUCCESS(status) && len > 0) { 2139 collVal[len] = 0; 2140 2141 len = uloc_getBaseName(localeID, tmpLocaleID, 2142 UPRV_LENGTHOF(tmpLocaleID) - 1, &status); 2143 2144 if (U_SUCCESS(status) && len > 0) { 2145 tmpLocaleID[len] = 0; 2146 2147 len = uloc_setKeywordValue("collation", collVal, tmpLocaleID, 2148 UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status); 2149 2150 if (U_SUCCESS(status) && len > 0) { 2151 tmpLocaleID[len] = 0; 2152 return uprv_convertToLCID(langID, tmpLocaleID, &status); 2153 } 2154 } 2155 } 2156 2157 // fall through - all keywords are simply ignored 2158 status = U_ZERO_ERROR; 2159 } 2160 2161 return uprv_convertToLCID(langID, localeID, &status); 2162} 2163 2164U_CAPI int32_t U_EXPORT2 2165uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, 2166 UErrorCode *status) 2167{ 2168 return uprv_convertToPosix(hostid, locale, localeCapacity, status); 2169} 2170 2171/* ### Default locale **************************************************/ 2172 2173U_CAPI const char* U_EXPORT2 2174uloc_getDefault() 2175{ 2176 return locale_get_default(); 2177} 2178 2179U_CAPI void U_EXPORT2 2180uloc_setDefault(const char* newDefaultLocale, 2181 UErrorCode* err) 2182{ 2183 if (U_FAILURE(*err)) 2184 return; 2185 /* the error code isn't currently used for anything by this function*/ 2186 2187 /* propagate change to C++ */ 2188 locale_set_default(newDefaultLocale); 2189} 2190 2191/** 2192 * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer 2193 * to an array of pointers to arrays of char. All of these pointers are owned 2194 * by ICU-- do not delete them, and do not write through them. The array is 2195 * terminated with a null pointer. 2196 */ 2197U_CAPI const char* const* U_EXPORT2 2198uloc_getISOLanguages() 2199{ 2200 return LANGUAGES; 2201} 2202 2203/** 2204 * Returns a list of all 2-letter country codes defined in ISO 639. This is a 2205 * pointer to an array of pointers to arrays of char. All of these pointers are 2206 * owned by ICU-- do not delete them, and do not write through them. The array is 2207 * terminated with a null pointer. 2208 */ 2209U_CAPI const char* const* U_EXPORT2 2210uloc_getISOCountries() 2211{ 2212 return COUNTRIES; 2213} 2214 2215 2216/* this function to be moved into cstring.c later */ 2217static char gDecimal = 0; 2218 2219static /* U_CAPI */ 2220double 2221/* U_EXPORT2 */ 2222_uloc_strtod(const char *start, char **end) { 2223 char *decimal; 2224 char *myEnd; 2225 char buf[30]; 2226 double rv; 2227 if (!gDecimal) { 2228 char rep[5]; 2229 /* For machines that decide to change the decimal on you, 2230 and try to be too smart with localization. 2231 This normally should be just a '.'. */ 2232 sprintf(rep, "%+1.1f", 1.0); 2233 gDecimal = rep[2]; 2234 } 2235 2236 if(gDecimal == '.') { 2237 return uprv_strtod(start, end); /* fall through to OS */ 2238 } else { 2239 uprv_strncpy(buf, start, 29); 2240 buf[29]=0; 2241 decimal = uprv_strchr(buf, '.'); 2242 if(decimal) { 2243 *decimal = gDecimal; 2244 } else { 2245 return uprv_strtod(start, end); /* no decimal point */ 2246 } 2247 rv = uprv_strtod(buf, &myEnd); 2248 if(end) { 2249 *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */ 2250 } 2251 return rv; 2252 } 2253} 2254 2255typedef struct { 2256 float q; 2257 int32_t dummy; /* to avoid uninitialized memory copy from qsort */ 2258 char locale[ULOC_FULLNAME_CAPACITY+1]; 2259} _acceptLangItem; 2260 2261static int32_t U_CALLCONV 2262uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b) 2263{ 2264 const _acceptLangItem *aa = (const _acceptLangItem*)a; 2265 const _acceptLangItem *bb = (const _acceptLangItem*)b; 2266 2267 int32_t rc = 0; 2268 if(bb->q < aa->q) { 2269 rc = -1; /* A > B */ 2270 } else if(bb->q > aa->q) { 2271 rc = 1; /* A < B */ 2272 } else { 2273 rc = 0; /* A = B */ 2274 } 2275 2276 if(rc==0) { 2277 rc = uprv_stricmp(aa->locale, bb->locale); 2278 } 2279 2280#if defined(ULOC_DEBUG) 2281 /* fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n", 2282 aa->locale, aa->q, 2283 bb->locale, bb->q, 2284 rc);*/ 2285#endif 2286 2287 return rc; 2288} 2289 2290/* 2291mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53 2292*/ 2293 2294U_CAPI int32_t U_EXPORT2 2295uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult, 2296 const char *httpAcceptLanguage, 2297 UEnumeration* availableLocales, 2298 UErrorCode *status) 2299{ 2300 MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items. 2301 char tmp[ULOC_FULLNAME_CAPACITY +1]; 2302 int32_t n = 0; 2303 const char *itemEnd; 2304 const char *paramEnd; 2305 const char *s; 2306 const char *t; 2307 int32_t res; 2308 int32_t i; 2309 int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage); 2310 2311 if(U_FAILURE(*status)) { 2312 return -1; 2313 } 2314 2315 for(s=httpAcceptLanguage;s&&*s;) { 2316 while(isspace(*s)) /* eat space at the beginning */ 2317 s++; 2318 itemEnd=uprv_strchr(s,','); 2319 paramEnd=uprv_strchr(s,';'); 2320 if(!itemEnd) { 2321 itemEnd = httpAcceptLanguage+l; /* end of string */ 2322 } 2323 if(paramEnd && paramEnd<itemEnd) { 2324 /* semicolon (;) is closer than end (,) */ 2325 t = paramEnd+1; 2326 if(*t=='q') { 2327 t++; 2328 } 2329 while(isspace(*t)) { 2330 t++; 2331 } 2332 if(*t=='=') { 2333 t++; 2334 } 2335 while(isspace(*t)) { 2336 t++; 2337 } 2338 items[n].q = (float)_uloc_strtod(t,NULL); 2339 } else { 2340 /* no semicolon - it's 1.0 */ 2341 items[n].q = 1.0f; 2342 paramEnd = itemEnd; 2343 } 2344 items[n].dummy=0; 2345 /* eat spaces prior to semi */ 2346 for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--) 2347 ; 2348 int32_t slen = ((t+1)-s); 2349 if(slen > ULOC_FULLNAME_CAPACITY) { 2350 *status = U_BUFFER_OVERFLOW_ERROR; 2351 return -1; // too big 2352 } 2353 uprv_strncpy(items[n].locale, s, slen); 2354 items[n].locale[slen]=0; // terminate 2355 int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status); 2356 if(U_FAILURE(*status)) return -1; 2357 if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) { 2358 // canonicalization had an effect- copy back 2359 uprv_strncpy(items[n].locale, tmp, clen); 2360 items[n].locale[clen] = 0; // terminate 2361 } 2362#if defined(ULOC_DEBUG) 2363 /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/ 2364#endif 2365 n++; 2366 s = itemEnd; 2367 while(*s==',') { /* eat duplicate commas */ 2368 s++; 2369 } 2370 if(n>=items.getCapacity()) { // If we need more items 2371 if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) { 2372 *status = U_MEMORY_ALLOCATION_ERROR; 2373 return -1; 2374 } 2375#if defined(ULOC_DEBUG) 2376 fprintf(stderr,"malloced at size %d\n", items.getCapacity()); 2377#endif 2378 } 2379 } 2380 uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status); 2381 if (U_FAILURE(*status)) { 2382 return -1; 2383 } 2384 LocalMemory<const char*> strs(NULL); 2385 if (strs.allocateInsteadAndReset(n) == NULL) { 2386 *status = U_MEMORY_ALLOCATION_ERROR; 2387 return -1; 2388 } 2389 for(i=0;i<n;i++) { 2390#if defined(ULOC_DEBUG) 2391 /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/ 2392#endif 2393 strs[i]=items[i].locale; 2394 } 2395 res = uloc_acceptLanguage(result, resultAvailable, outResult, 2396 strs.getAlias(), n, availableLocales, status); 2397 return res; 2398} 2399 2400 2401U_CAPI int32_t U_EXPORT2 2402uloc_acceptLanguage(char *result, int32_t resultAvailable, 2403 UAcceptResult *outResult, const char **acceptList, 2404 int32_t acceptListCount, 2405 UEnumeration* availableLocales, 2406 UErrorCode *status) 2407{ 2408 int32_t i,j; 2409 int32_t len; 2410 int32_t maxLen=0; 2411 char tmp[ULOC_FULLNAME_CAPACITY+1]; 2412 const char *l; 2413 char **fallbackList; 2414 if(U_FAILURE(*status)) { 2415 return -1; 2416 } 2417 fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount))); 2418 if(fallbackList==NULL) { 2419 *status = U_MEMORY_ALLOCATION_ERROR; 2420 return -1; 2421 } 2422 for(i=0;i<acceptListCount;i++) { 2423#if defined(ULOC_DEBUG) 2424 fprintf(stderr,"%02d: %s\n", i, acceptList[i]); 2425#endif 2426 while((l=uenum_next(availableLocales, NULL, status))) { 2427#if defined(ULOC_DEBUG) 2428 fprintf(stderr," %s\n", l); 2429#endif 2430 len = (int32_t)uprv_strlen(l); 2431 if(!uprv_strcmp(acceptList[i], l)) { 2432 if(outResult) { 2433 *outResult = ULOC_ACCEPT_VALID; 2434 } 2435#if defined(ULOC_DEBUG) 2436 fprintf(stderr, "MATCH! %s\n", l); 2437#endif 2438 if(len>0) { 2439 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2440 } 2441 for(j=0;j<i;j++) { 2442 uprv_free(fallbackList[j]); 2443 } 2444 uprv_free(fallbackList); 2445 return u_terminateChars(result, resultAvailable, len, status); 2446 } 2447 if(len>maxLen) { 2448 maxLen = len; 2449 } 2450 } 2451 uenum_reset(availableLocales, status); 2452 /* save off parent info */ 2453 if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) { 2454 fallbackList[i] = uprv_strdup(tmp); 2455 } else { 2456 fallbackList[i]=0; 2457 } 2458 } 2459 2460 for(maxLen--;maxLen>0;maxLen--) { 2461 for(i=0;i<acceptListCount;i++) { 2462 if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) { 2463#if defined(ULOC_DEBUG) 2464 fprintf(stderr,"Try: [%s]", fallbackList[i]); 2465#endif 2466 while((l=uenum_next(availableLocales, NULL, status))) { 2467#if defined(ULOC_DEBUG) 2468 fprintf(stderr," %s\n", l); 2469#endif 2470 len = (int32_t)uprv_strlen(l); 2471 if(!uprv_strcmp(fallbackList[i], l)) { 2472 if(outResult) { 2473 *outResult = ULOC_ACCEPT_FALLBACK; 2474 } 2475#if defined(ULOC_DEBUG) 2476 fprintf(stderr, "fallback MATCH! %s\n", l); 2477#endif 2478 if(len>0) { 2479 uprv_strncpy(result, l, uprv_min(len, resultAvailable)); 2480 } 2481 for(j=0;j<acceptListCount;j++) { 2482 uprv_free(fallbackList[j]); 2483 } 2484 uprv_free(fallbackList); 2485 return u_terminateChars(result, resultAvailable, len, status); 2486 } 2487 } 2488 uenum_reset(availableLocales, status); 2489 2490 if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) { 2491 uprv_free(fallbackList[i]); 2492 fallbackList[i] = uprv_strdup(tmp); 2493 } else { 2494 uprv_free(fallbackList[i]); 2495 fallbackList[i]=0; 2496 } 2497 } 2498 } 2499 if(outResult) { 2500 *outResult = ULOC_ACCEPT_FAILED; 2501 } 2502 } 2503 for(i=0;i<acceptListCount;i++) { 2504 uprv_free(fallbackList[i]); 2505 } 2506 uprv_free(fallbackList); 2507 return -1; 2508} 2509 2510U_CAPI const char* U_EXPORT2 2511uloc_toUnicodeLocaleKey(const char* keyword) 2512{ 2513 const char* bcpKey = ulocimp_toBcpKey(keyword); 2514 if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) { 2515 // unknown keyword, but syntax is fine.. 2516 return keyword; 2517 } 2518 return bcpKey; 2519} 2520 2521U_CAPI const char* U_EXPORT2 2522uloc_toUnicodeLocaleType(const char* keyword, const char* value) 2523{ 2524 const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL); 2525 if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) { 2526 // unknown keyword, but syntax is fine.. 2527 return value; 2528 } 2529 return bcpType; 2530} 2531 2532#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) 2533#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) 2534 2535static UBool 2536isWellFormedLegacyKey(const char* legacyKey) 2537{ 2538 const char* p = legacyKey; 2539 while (*p) { 2540 if (!UPRV_ISALPHANUM(*p)) { 2541 return FALSE; 2542 } 2543 p++; 2544 } 2545 return TRUE; 2546} 2547 2548static UBool 2549isWellFormedLegacyType(const char* legacyType) 2550{ 2551 const char* p = legacyType; 2552 int32_t alphaNumLen = 0; 2553 while (*p) { 2554 if (*p == '_' || *p == '/' || *p == '-') { 2555 if (alphaNumLen == 0) { 2556 return FALSE; 2557 } 2558 alphaNumLen = 0; 2559 } else if (UPRV_ISALPHANUM(*p)) { 2560 alphaNumLen++; 2561 } else { 2562 return FALSE; 2563 } 2564 p++; 2565 } 2566 return (alphaNumLen != 0); 2567} 2568 2569U_CAPI const char* U_EXPORT2 2570uloc_toLegacyKey(const char* keyword) 2571{ 2572 const char* legacyKey = ulocimp_toLegacyKey(keyword); 2573 if (legacyKey == NULL) { 2574 // Checks if the specified locale key is well-formed with the legacy locale syntax. 2575 // 2576 // Note: 2577 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax. 2578 // However, a key should not contain '=' obviously. For now, all existing 2579 // keys are using ASCII alphabetic letters only. We won't add any new key 2580 // that is not compatible with the BCP 47 syntax. Therefore, we assume 2581 // a valid key consist from [0-9a-zA-Z], no symbols. 2582 if (isWellFormedLegacyKey(keyword)) { 2583 return keyword; 2584 } 2585 } 2586 return legacyKey; 2587} 2588 2589U_CAPI const char* U_EXPORT2 2590uloc_toLegacyType(const char* keyword, const char* value) 2591{ 2592 const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL); 2593 if (legacyType == NULL) { 2594 // Checks if the specified locale type is well-formed with the legacy locale syntax. 2595 // 2596 // Note: 2597 // Neither ICU nor LDML/CLDR provides the definition of keyword syntax. 2598 // However, a type should not contain '=' obviously. For now, all existing 2599 // types are using ASCII alphabetic letters with a few symbol letters. We won't 2600 // add any new type that is not compatible with the BCP 47 syntax except timezone 2601 // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain 2602 // '-' '_' '/' in the middle. 2603 if (isWellFormedLegacyType(value)) { 2604 return value; 2605 } 2606 } 2607 return legacyType; 2608} 2609 2610/*eof*/ 2611