1/* 2****************************************************************************** 3* 4* Copyright (C) 1997-2011, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7****************************************************************************** 8* 9* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) 10* 11* Date Name Description 12* 04/14/97 aliu Creation. 13* 04/24/97 aliu Added getDefaultDataDirectory() and 14* getDefaultLocaleID(). 15* 04/28/97 aliu Rewritten to assume Unix and apply general methods 16* for assumed case. Non-UNIX platforms must be 17* special-cased. Rewrote numeric methods dealing 18* with NaN and Infinity to be platform independent 19* over all IEEE 754 platforms. 20* 05/13/97 aliu Restored sign of timezone 21* (semantics are hours West of GMT) 22* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, 23* nextDouble.. 24* 07/22/98 stephen Added remainder, max, min, trunc 25* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity 26* 08/24/98 stephen Added longBitsFromDouble 27* 09/08/98 stephen Minor changes for Mac Port 28* 03/02/99 stephen Removed openFile(). Added AS400 support. 29* Fixed EBCDIC tables 30* 04/15/99 stephen Converted to C. 31* 06/28/99 stephen Removed mutex locking in u_isBigEndian(). 32* 08/04/99 jeffrey R. Added OS/2 changes 33* 11/15/99 helena Integrated S/390 IEEE support. 34* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID 35* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage 36* 01/03/08 Steven L. Fake Time Support 37****************************************************************************** 38*/ 39 40/* Define _XOPEN_SOURCE for Solaris and friends. */ 41/* NetBSD needs it to be >= 4 */ 42#if !defined(_XOPEN_SOURCE) 43#if __STDC_VERSION__ >= 199901L 44/* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */ 45#define _XOPEN_SOURCE 600 46#else 47#define _XOPEN_SOURCE 4 48#endif 49#endif 50 51/* Make sure things like readlink and such functions work. 52Poorly upgraded Solaris machines can't have this defined. 53Cleanly installed Solaris can use this #define. 54*/ 55#if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__)) 56#define _XOPEN_SOURCE_EXTENDED 1 57#endif 58 59/* include ICU headers */ 60#include "unicode/utypes.h" 61#include "unicode/putil.h" 62#include "unicode/ustring.h" 63#include "putilimp.h" 64#include "uassert.h" 65#include "umutex.h" 66#include "cmemory.h" 67#include "cstring.h" 68#include "locmap.h" 69#include "ucln_cmn.h" 70 71/* Include standard headers. */ 72#include <stdio.h> 73#include <stdlib.h> 74#include <string.h> 75#include <math.h> 76#include <locale.h> 77#include <float.h> 78 79/* include system headers */ 80#if defined(U_WINDOWS) || defined(U_MINGW) 81# define WIN32_LEAN_AND_MEAN 82# define VC_EXTRALEAN 83# define NOUSER 84# define NOSERVICE 85# define NOIME 86# define NOMCX 87# include <windows.h> 88# include "wintz.h" 89#elif defined(OS400) 90# include <float.h> 91# include <qusec.h> /* error code structure */ 92# include <qusrjobi.h> 93# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ 94# include <mih/testptr.h> /* For uprv_maximumPtr */ 95#elif defined(XP_MAC) 96# include <Files.h> 97# include <IntlResources.h> 98# include <Script.h> 99# include <Folders.h> 100# include <MacTypes.h> 101# include <TextUtils.h> 102# define ICU_NO_USER_DATA_OVERRIDE 1 103#elif defined(OS390) 104# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ 105#elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD) 106# include <limits.h> 107# include <unistd.h> 108#elif defined(U_QNX) 109# include <sys/neutrino.h> 110#elif defined(U_SOLARIS) 111# ifndef _XPG4_2 112# define _XPG4_2 113# endif 114#endif 115 116#if (defined(U_CYGWIN) || defined(U_MINGW)) && defined(__STRICT_ANSI__) 117/* tzset isn't defined in strict ANSI on Cygwin and MinGW. */ 118#undef __STRICT_ANSI__ 119#endif 120 121/* 122 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. 123 */ 124#include <time.h> 125 126#if defined(U_DARWIN) 127#include <TargetConditionals.h> 128#endif 129 130#ifndef U_WINDOWS 131#include <sys/time.h> 132#endif 133 134/* 135 * Only include langinfo.h if we have a way to get the codeset. If we later 136 * depend on more feature, we can test on U_HAVE_NL_LANGINFO. 137 * 138 */ 139 140#if U_HAVE_NL_LANGINFO_CODESET 141#include <langinfo.h> 142#endif 143 144/** 145 * Simple things (presence of functions, etc) should just go in configure.in and be added to 146 * icucfg.h via autoheader. 147 */ 148#if defined(U_HAVE_ICUCFG) 149#include "icucfg.h" 150#endif 151 152/* Define the extension for data files, again... */ 153#define DATA_TYPE "dat" 154 155/* Leave this copyright notice here! */ 156static const char copyright[] = U_COPYRIGHT_STRING; 157 158/* floating point implementations ------------------------------------------- */ 159 160/* We return QNAN rather than SNAN*/ 161#define SIGN 0x80000000U 162 163/* Make it easy to define certain types of constants */ 164typedef union { 165 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ 166 double d64; 167} BitPatternConversion; 168static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; 169static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; 170 171/*--------------------------------------------------------------------------- 172 Platform utilities 173 Our general strategy is to assume we're on a POSIX platform. Platforms which 174 are non-POSIX must declare themselves so. The default POSIX implementation 175 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related 176 functions). 177 ---------------------------------------------------------------------------*/ 178 179#if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400) || defined(U_MINGW) 180# undef U_POSIX_LOCALE 181#else 182# define U_POSIX_LOCALE 1 183#endif 184 185/* 186 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble 187 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). 188*/ 189#if !IEEE_754 190static char* 191u_topNBytesOfDouble(double* d, int n) 192{ 193#if U_IS_BIG_ENDIAN 194 return (char*)d; 195#else 196 return (char*)(d + 1) - n; 197#endif 198} 199 200static char* 201u_bottomNBytesOfDouble(double* d, int n) 202{ 203#if U_IS_BIG_ENDIAN 204 return (char*)(d + 1) - n; 205#else 206 return (char*)d; 207#endif 208} 209#endif /* !IEEE_754 */ 210 211#if IEEE_754 212static UBool 213u_signBit(double d) { 214 uint8_t hiByte; 215#if U_IS_BIG_ENDIAN 216 hiByte = *(uint8_t *)&d; 217#else 218 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); 219#endif 220 return (hiByte & 0x80) != 0; 221} 222#endif 223 224 225 226#if defined (U_DEBUG_FAKETIME) 227/* Override the clock to test things without having to move the system clock. 228 * Assumes POSIX gettimeofday() will function 229 */ 230UDate fakeClock_t0 = 0; /** Time to start the clock from **/ 231UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ 232UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ 233static UMTX fakeClockMutex = NULL; 234 235static UDate getUTCtime_real() { 236 struct timeval posixTime; 237 gettimeofday(&posixTime, NULL); 238 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 239} 240 241static UDate getUTCtime_fake() { 242 umtx_lock(&fakeClockMutex); 243 if(!fakeClock_set) { 244 UDate real = getUTCtime_real(); 245 const char *fake_start = getenv("U_FAKETIME_START"); 246 if((fake_start!=NULL) && (fake_start[0]!=0)) { 247 sscanf(fake_start,"%lf",&fakeClock_t0); 248 fakeClock_dt = fakeClock_t0 - real; 249 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" 250 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", 251 fakeClock_t0, fake_start, fakeClock_dt, real); 252 } else { 253 fakeClock_dt = 0; 254 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" 255 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); 256 } 257 fakeClock_set = TRUE; 258 } 259 umtx_unlock(&fakeClockMutex); 260 261 return getUTCtime_real() + fakeClock_dt; 262} 263#endif 264 265#if defined(U_WINDOWS) 266typedef union { 267 int64_t int64; 268 FILETIME fileTime; 269} FileTimeConversion; /* This is like a ULARGE_INTEGER */ 270 271/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ 272#define EPOCH_BIAS INT64_C(116444736000000000) 273#define HECTONANOSECOND_PER_MILLISECOND 10000 274 275#endif 276 277/*--------------------------------------------------------------------------- 278 Universal Implementations 279 These are designed to work on all platforms. Try these, and if they 280 don't work on your platform, then special case your platform with new 281 implementations. 282---------------------------------------------------------------------------*/ 283 284U_CAPI UDate U_EXPORT2 285uprv_getUTCtime() 286{ 287#if defined(U_DEBUG_FAKETIME) 288 return getUTCtime_fake(); /* Hook for overriding the clock */ 289#else 290 return uprv_getRawUTCtime(); 291#endif 292} 293 294/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ 295U_CAPI UDate U_EXPORT2 296uprv_getRawUTCtime() 297{ 298#if defined(XP_MAC) 299 time_t t, t1, t2; 300 struct tm tmrec; 301 302 uprv_memset( &tmrec, 0, sizeof(tmrec) ); 303 tmrec.tm_year = 70; 304 tmrec.tm_mon = 0; 305 tmrec.tm_mday = 1; 306 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/ 307 308 time(&t); 309 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 310 t2 = mktime(&tmrec); /* seconds of current GMT*/ 311 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/ 312#elif defined(U_WINDOWS) 313 314 FileTimeConversion winTime; 315 GetSystemTimeAsFileTime(&winTime.fileTime); 316 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); 317#else 318 319#if defined(HAVE_GETTIMEOFDAY) 320 struct timeval posixTime; 321 gettimeofday(&posixTime, NULL); 322 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 323#else 324 time_t epochtime; 325 time(&epochtime); 326 return (UDate)epochtime * U_MILLIS_PER_SECOND; 327#endif 328 329#endif 330} 331 332/*----------------------------------------------------------------------------- 333 IEEE 754 334 These methods detect and return NaN and infinity values for doubles 335 conforming to IEEE 754. Platforms which support this standard include X86, 336 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. 337 If this doesn't work on your platform, you have non-IEEE floating-point, and 338 will need to code your own versions. A naive implementation is to return 0.0 339 for getNaN and getInfinity, and false for isNaN and isInfinite. 340 ---------------------------------------------------------------------------*/ 341 342U_CAPI UBool U_EXPORT2 343uprv_isNaN(double number) 344{ 345#if IEEE_754 346 BitPatternConversion convertedNumber; 347 convertedNumber.d64 = number; 348 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ 349 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); 350 351#elif defined(OS390) 352 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 353 sizeof(uint32_t)); 354 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 355 sizeof(uint32_t)); 356 357 return ((highBits & 0x7F080000L) == 0x7F080000L) && 358 (lowBits == 0x00000000L); 359 360#else 361 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 362 /* you'll need to replace this default implementation with what's correct*/ 363 /* for your platform.*/ 364 return number != number; 365#endif 366} 367 368U_CAPI UBool U_EXPORT2 369uprv_isInfinite(double number) 370{ 371#if IEEE_754 372 BitPatternConversion convertedNumber; 373 convertedNumber.d64 = number; 374 /* Infinity is exactly 0x7FF0000000000000U. */ 375 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); 376#elif defined(OS390) 377 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 378 sizeof(uint32_t)); 379 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 380 sizeof(uint32_t)); 381 382 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); 383 384#else 385 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 386 /* value, you'll need to replace this default implementation with what's*/ 387 /* correct for your platform.*/ 388 return number == (2.0 * number); 389#endif 390} 391 392U_CAPI UBool U_EXPORT2 393uprv_isPositiveInfinity(double number) 394{ 395#if IEEE_754 || defined(OS390) 396 return (UBool)(number > 0 && uprv_isInfinite(number)); 397#else 398 return uprv_isInfinite(number); 399#endif 400} 401 402U_CAPI UBool U_EXPORT2 403uprv_isNegativeInfinity(double number) 404{ 405#if IEEE_754 || defined(OS390) 406 return (UBool)(number < 0 && uprv_isInfinite(number)); 407 408#else 409 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 410 sizeof(uint32_t)); 411 return((highBits & SIGN) && uprv_isInfinite(number)); 412 413#endif 414} 415 416U_CAPI double U_EXPORT2 417uprv_getNaN() 418{ 419#if IEEE_754 || defined(OS390) 420 return gNan.d64; 421#else 422 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 423 /* you'll need to replace this default implementation with what's correct*/ 424 /* for your platform.*/ 425 return 0.0; 426#endif 427} 428 429U_CAPI double U_EXPORT2 430uprv_getInfinity() 431{ 432#if IEEE_754 || defined(OS390) 433 return gInf.d64; 434#else 435 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 436 /* value, you'll need to replace this default implementation with what's*/ 437 /* correct for your platform.*/ 438 return 0.0; 439#endif 440} 441 442U_CAPI double U_EXPORT2 443uprv_floor(double x) 444{ 445 return floor(x); 446} 447 448U_CAPI double U_EXPORT2 449uprv_ceil(double x) 450{ 451 return ceil(x); 452} 453 454U_CAPI double U_EXPORT2 455uprv_round(double x) 456{ 457 return uprv_floor(x + 0.5); 458} 459 460U_CAPI double U_EXPORT2 461uprv_fabs(double x) 462{ 463 return fabs(x); 464} 465 466U_CAPI double U_EXPORT2 467uprv_modf(double x, double* y) 468{ 469 return modf(x, y); 470} 471 472U_CAPI double U_EXPORT2 473uprv_fmod(double x, double y) 474{ 475 return fmod(x, y); 476} 477 478U_CAPI double U_EXPORT2 479uprv_pow(double x, double y) 480{ 481 /* This is declared as "double pow(double x, double y)" */ 482 return pow(x, y); 483} 484 485U_CAPI double U_EXPORT2 486uprv_pow10(int32_t x) 487{ 488 return pow(10.0, (double)x); 489} 490 491U_CAPI double U_EXPORT2 492uprv_fmax(double x, double y) 493{ 494#if IEEE_754 495 /* first handle NaN*/ 496 if(uprv_isNaN(x) || uprv_isNaN(y)) 497 return uprv_getNaN(); 498 499 /* check for -0 and 0*/ 500 if(x == 0.0 && y == 0.0 && u_signBit(x)) 501 return y; 502 503#endif 504 505 /* this should work for all flt point w/o NaN and Inf special cases */ 506 return (x > y ? x : y); 507} 508 509U_CAPI double U_EXPORT2 510uprv_fmin(double x, double y) 511{ 512#if IEEE_754 513 /* first handle NaN*/ 514 if(uprv_isNaN(x) || uprv_isNaN(y)) 515 return uprv_getNaN(); 516 517 /* check for -0 and 0*/ 518 if(x == 0.0 && y == 0.0 && u_signBit(y)) 519 return y; 520 521#endif 522 523 /* this should work for all flt point w/o NaN and Inf special cases */ 524 return (x > y ? y : x); 525} 526 527/** 528 * Truncates the given double. 529 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 530 * This is different than calling floor() or ceil(): 531 * floor(3.3) = 3, floor(-3.3) = -4 532 * ceil(3.3) = 4, ceil(-3.3) = -3 533 */ 534U_CAPI double U_EXPORT2 535uprv_trunc(double d) 536{ 537#if IEEE_754 538 /* handle error cases*/ 539 if(uprv_isNaN(d)) 540 return uprv_getNaN(); 541 if(uprv_isInfinite(d)) 542 return uprv_getInfinity(); 543 544 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ 545 return ceil(d); 546 else 547 return floor(d); 548 549#else 550 return d >= 0 ? floor(d) : ceil(d); 551 552#endif 553} 554 555/** 556 * Return the largest positive number that can be represented by an integer 557 * type of arbitrary bit length. 558 */ 559U_CAPI double U_EXPORT2 560uprv_maxMantissa(void) 561{ 562 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; 563} 564 565U_CAPI double U_EXPORT2 566uprv_log(double d) 567{ 568 return log(d); 569} 570 571U_CAPI void * U_EXPORT2 572uprv_maximumPtr(void * base) 573{ 574#if defined(OS400) 575 /* 576 * With the provided function we should never be out of range of a given segment 577 * (a traditional/typical segment that is). Our segments have 5 bytes for the 578 * id and 3 bytes for the offset. The key is that the casting takes care of 579 * only retrieving the offset portion minus x1000. Hence, the smallest offset 580 * seen in a program is x001000 and when casted to an int would be 0. 581 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. 582 * 583 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is 584 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). 585 * This function determines the activation based on the pointer that is passed in and 586 * calculates the appropriate maximum available size for 587 * each pointer type (TERASPACE and non-TERASPACE) 588 * 589 * Unlike other operating systems, the pointer model isn't determined at 590 * compile time on i5/OS. 591 */ 592 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { 593 /* if it is a TERASPACE pointer the max is 2GB - 4k */ 594 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); 595 } 596 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ 597 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); 598 599#else 600 return U_MAX_PTR(base); 601#endif 602} 603 604/*--------------------------------------------------------------------------- 605 Platform-specific Implementations 606 Try these, and if they don't work on your platform, then special case your 607 platform with new implementations. 608 ---------------------------------------------------------------------------*/ 609 610/* Generic time zone layer -------------------------------------------------- */ 611 612/* Time zone utilities */ 613U_CAPI void U_EXPORT2 614uprv_tzset() 615{ 616#if defined(U_TZSET) 617 U_TZSET(); 618#else 619 /* no initialization*/ 620#endif 621} 622 623U_CAPI int32_t U_EXPORT2 624uprv_timezone() 625{ 626#ifdef U_TIMEZONE 627 return U_TIMEZONE; 628#else 629 time_t t, t1, t2; 630 struct tm tmrec; 631 UBool dst_checked; 632 int32_t tdiff = 0; 633 634 time(&t); 635 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); 636 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ 637 t1 = mktime(&tmrec); /* local time in seconds*/ 638 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 639 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ 640 tdiff = t2 - t1; 641 /* imitate NT behaviour, which returns same timezone offset to GMT for 642 winter and summer*/ 643 if (dst_checked) 644 tdiff += 3600; 645 return tdiff; 646#endif 647} 648 649/* Note that U_TZNAME does *not* have to be tzname, but if it is, 650 some platforms need to have it declared here. */ 651 652#if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN)) 653/* RS6000 and others reject char **tzname. */ 654extern U_IMPORT char *U_TZNAME[]; 655#endif 656 657#if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)) 658/* These platforms are likely to use Olson timezone IDs. */ 659#define CHECK_LOCALTIME_LINK 1 660#if defined(U_DARWIN) 661#include <tzfile.h> 662#define TZZONEINFO (TZDIR "/") 663#else 664#define TZDEFAULT "/etc/localtime" 665#define TZZONEINFO "/usr/share/zoneinfo/" 666#endif 667#if U_HAVE_DIRENT_H 668#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ 669/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo 670 symlinked to /etc/localtime, which makes searchForTZFile return 671 'localtime' when it's the first match. */ 672#define TZFILE_SKIP2 "localtime" 673#define SEARCH_TZFILE 674#include <dirent.h> /* Needed to search through system timezone files */ 675#endif 676static char gTimeZoneBuffer[PATH_MAX]; 677static char *gTimeZoneBufferPtr = NULL; 678#endif 679 680#ifndef U_WINDOWS 681#define isNonDigit(ch) (ch < '0' || '9' < ch) 682static UBool isValidOlsonID(const char *id) { 683 int32_t idx = 0; 684 685 /* Determine if this is something like Iceland (Olson ID) 686 or AST4ADT (non-Olson ID) */ 687 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { 688 idx++; 689 } 690 691 /* If we went through the whole string, then it might be okay. 692 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", 693 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. 694 The rest of the time it could be an Olson ID. George */ 695 return (UBool)(id[idx] == 0 696 || uprv_strcmp(id, "PST8PDT") == 0 697 || uprv_strcmp(id, "MST7MDT") == 0 698 || uprv_strcmp(id, "CST6CDT") == 0 699 || uprv_strcmp(id, "EST5EDT") == 0); 700} 701 702/* On some Unix-like OS, 'posix' subdirectory in 703 /usr/share/zoneinfo replicates the top-level contents. 'right' 704 subdirectory has the same set of files, but individual files 705 are different from those in the top-level directory or 'posix' 706 because 'right' has files for TAI (Int'l Atomic Time) while 'posix' 707 has files for UTC. 708 When the first match for /etc/localtime is in either of them 709 (usually in posix because 'right' has different file contents), 710 or TZ environment variable points to one of them, createTimeZone 711 fails because, say, 'posix/America/New_York' is not an Olson 712 timezone id ('America/New_York' is). So, we have to skip 713 'posix/' and 'right/' at the beginning. */ 714static void skipZoneIDPrefix(const char** id) { 715 if (uprv_strncmp(*id, "posix/", 6) == 0 716 || uprv_strncmp(*id, "right/", 6) == 0) 717 { 718 *id += 6; 719 } 720} 721#endif 722 723#if defined(U_TZNAME) && !defined(U_WINDOWS) 724 725#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) 726typedef struct OffsetZoneMapping { 727 int32_t offsetSeconds; 728 int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/ 729 const char *stdID; 730 const char *dstID; 731 const char *olsonID; 732} OffsetZoneMapping; 733 734/* 735This list tries to disambiguate a set of abbreviated timezone IDs and offsets 736and maps it to an Olson ID. 737Before adding anything to this list, take a look at 738icu/source/tools/tzcode/tz.alias 739Sometimes no daylight savings (0) is important to define due to aliases. 740This list can be tested with icu/source/test/compat/tzone.pl 741More values could be added to daylightType to increase precision. 742*/ 743static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { 744 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, 745 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, 746 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, 747 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, 748 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, 749 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, 750 {-36000, 2, "EST", "EST", "Australia/Sydney"}, 751 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, 752 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, 753 {-34200, 2, "CST", "CST", "Australia/South"}, 754 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, 755 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, 756 {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, 757 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, 758 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, 759 {-28800, 2, "WST", "WST", "Australia/West"}, 760 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, 761 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, 762 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, 763 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, 764 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, 765 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, 766 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, 767 {-14400, 1, "AZT", "AZST", "Asia/Baku"}, 768 {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, 769 {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, 770 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, 771 {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, 772 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ 773 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, 774 {-3600, 0, "CET", "WEST", "Africa/Algiers"}, 775 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, 776 {0, 1, "GMT", "IST", "Europe/Dublin"}, 777 {0, 1, "GMT", "BST", "Europe/London"}, 778 {0, 0, "WET", "WEST", "Africa/Casablanca"}, 779 {0, 0, "WET", "WET", "Africa/El_Aaiun"}, 780 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, 781 {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, 782 {10800, 1, "PMST", "PMDT", "America/Miquelon"}, 783 {10800, 2, "UYT", "UYST", "America/Montevideo"}, 784 {10800, 1, "WGT", "WGST", "America/Godthab"}, 785 {10800, 2, "BRT", "BRST", "Brazil/East"}, 786 {12600, 1, "NST", "NDT", "America/St_Johns"}, 787 {14400, 1, "AST", "ADT", "Canada/Atlantic"}, 788 {14400, 2, "AMT", "AMST", "America/Cuiaba"}, 789 {14400, 2, "CLT", "CLST", "Chile/Continental"}, 790 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, 791 {14400, 2, "PYT", "PYST", "America/Asuncion"}, 792 {18000, 1, "CST", "CDT", "America/Havana"}, 793 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ 794 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, 795 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, 796 {21600, 0, "CST", "CDT", "America/Guatemala"}, 797 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ 798 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ 799 {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, 800 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ 801 {32400, 1, "AKST", "AKDT", "US/Alaska"}, 802 {36000, 1, "HAST", "HADT", "US/Aleutian"} 803}; 804 805/*#define DEBUG_TZNAME*/ 806 807static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) 808{ 809 int32_t idx; 810#ifdef DEBUG_TZNAME 811 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); 812#endif 813 for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++) 814 { 815 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds 816 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType 817 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 818 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) 819 { 820 return OFFSET_ZONE_MAPPINGS[idx].olsonID; 821 } 822 } 823 return NULL; 824} 825#endif 826 827#ifdef SEARCH_TZFILE 828#define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */ 829#define MAX_READ_SIZE 512 830 831typedef struct DefaultTZInfo { 832 char* defaultTZBuffer; 833 int64_t defaultTZFileSize; 834 FILE* defaultTZFilePtr; 835 UBool defaultTZstatus; 836 int32_t defaultTZPosition; 837} DefaultTZInfo; 838 839/* 840 * This method compares the two files given to see if they are a match. 841 * It is currently use to compare two TZ files. 842 */ 843static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { 844 FILE* file; 845 int64_t sizeFile; 846 int64_t sizeFileLeft; 847 int32_t sizeFileRead; 848 int32_t sizeFileToRead; 849 char bufferFile[MAX_READ_SIZE]; 850 UBool result = TRUE; 851 852 if (tzInfo->defaultTZFilePtr == NULL) { 853 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); 854 } 855 file = fopen(TZFileName, "r"); 856 857 tzInfo->defaultTZPosition = 0; /* reset position to begin search */ 858 859 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { 860 /* First check that the file size are equal. */ 861 if (tzInfo->defaultTZFileSize == 0) { 862 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); 863 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); 864 } 865 fseek(file, 0, SEEK_END); 866 sizeFile = ftell(file); 867 sizeFileLeft = sizeFile; 868 869 if (sizeFile != tzInfo->defaultTZFileSize) { 870 result = FALSE; 871 } else { 872 /* Store the data from the files in seperate buffers and 873 * compare each byte to determine equality. 874 */ 875 if (tzInfo->defaultTZBuffer == NULL) { 876 rewind(tzInfo->defaultTZFilePtr); 877 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); 878 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); 879 } 880 rewind(file); 881 while(sizeFileLeft > 0) { 882 uprv_memset(bufferFile, 0, MAX_READ_SIZE); 883 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; 884 885 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); 886 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { 887 result = FALSE; 888 break; 889 } 890 sizeFileLeft -= sizeFileRead; 891 tzInfo->defaultTZPosition += sizeFileRead; 892 } 893 } 894 } else { 895 result = FALSE; 896 } 897 898 if (file != NULL) { 899 fclose(file); 900 } 901 902 return result; 903} 904/* 905 * This method recursively traverses the directory given for a matching TZ file and returns the first match. 906 */ 907/* dirent also lists two entries: "." and ".." that we can safely ignore. */ 908#define SKIP1 "." 909#define SKIP2 ".." 910static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = ""; 911static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { 912 char curpath[MAX_PATH_SIZE]; 913 DIR* dirp = opendir(path); 914 DIR* subDirp = NULL; 915 struct dirent* dirEntry = NULL; 916 917 char* result = NULL; 918 if (dirp == NULL) { 919 return result; 920 } 921 922 /* Save the current path */ 923 uprv_memset(curpath, 0, MAX_PATH_SIZE); 924 uprv_strcpy(curpath, path); 925 926 /* Check each entry in the directory. */ 927 while((dirEntry = readdir(dirp)) != NULL) { 928 const char* dirName = dirEntry->d_name; 929 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) { 930 /* Create a newpath with the new entry to test each entry in the directory. */ 931 char newpath[MAX_PATH_SIZE]; 932 uprv_strcpy(newpath, curpath); 933 uprv_strcat(newpath, dirName); 934 935 if ((subDirp = opendir(newpath)) != NULL) { 936 /* If this new path is a directory, make a recursive call with the newpath. */ 937 closedir(subDirp); 938 uprv_strcat(newpath, "/"); 939 result = searchForTZFile(newpath, tzInfo); 940 /* 941 Have to get out here. Otherwise, we'd keep looking 942 and return the first match in the top-level directory 943 if there's a match in the top-level. If not, this function 944 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). 945 It worked without this in most cases because we have a fallback of calling 946 localtime_r to figure out the default timezone. 947 */ 948 if (result != NULL) 949 break; 950 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { 951 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) { 952 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1; 953 skipZoneIDPrefix(&zoneid); 954 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid); 955 result = SEARCH_TZFILE_RESULT; 956 /* Get out after the first one found. */ 957 break; 958 } 959 } 960 } 961 } 962 closedir(dirp); 963 return result; 964} 965#endif 966U_CAPI const char* U_EXPORT2 967uprv_tzname(int n) 968{ 969 const char *tzid = NULL; 970#ifdef U_WINDOWS 971 tzid = uprv_detectWindowsTimeZone(); 972 973 if (tzid != NULL) { 974 return tzid; 975 } 976#else 977 978/*#if defined(U_DARWIN) 979 int ret; 980 981 tzid = getenv("TZFILE"); 982 if (tzid != NULL) { 983 return tzid; 984 } 985#endif*/ 986 987/* This code can be temporarily disabled to test tzname resolution later on. */ 988#ifndef DEBUG_TZNAME 989 tzid = getenv("TZ"); 990 if (tzid != NULL && isValidOlsonID(tzid)) 991 { 992 /* This might be a good Olson ID. */ 993 skipZoneIDPrefix(&tzid); 994 return tzid; 995 } 996 /* else U_TZNAME will give a better result. */ 997#endif 998 999#if defined(CHECK_LOCALTIME_LINK) 1000 /* Caller must handle threading issues */ 1001 if (gTimeZoneBufferPtr == NULL) { 1002 /* 1003 This is a trick to look at the name of the link to get the Olson ID 1004 because the tzfile contents is underspecified. 1005 This isn't guaranteed to work because it may not be a symlink. 1006 */ 1007 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)); 1008 if (0 < ret) { 1009 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); 1010 gTimeZoneBuffer[ret] = 0; 1011 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 1012 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1013 { 1014 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1015 } 1016 } else { 1017#if defined(SEARCH_TZFILE) 1018 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); 1019 if (tzInfo != NULL) { 1020 tzInfo->defaultTZBuffer = NULL; 1021 tzInfo->defaultTZFileSize = 0; 1022 tzInfo->defaultTZFilePtr = NULL; 1023 tzInfo->defaultTZstatus = FALSE; 1024 tzInfo->defaultTZPosition = 0; 1025 1026 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); 1027 1028 /* Free previously allocated memory */ 1029 if (tzInfo->defaultTZBuffer != NULL) { 1030 uprv_free(tzInfo->defaultTZBuffer); 1031 } 1032 if (tzInfo->defaultTZFilePtr != NULL) { 1033 fclose(tzInfo->defaultTZFilePtr); 1034 } 1035 uprv_free(tzInfo); 1036 } 1037 1038 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { 1039 return gTimeZoneBufferPtr; 1040 } 1041#endif 1042 } 1043 } 1044 else { 1045 return gTimeZoneBufferPtr; 1046 } 1047#endif 1048#endif 1049 1050#ifdef U_TZNAME 1051#if defined(U_WINDOWS) || defined(U_MINGW) 1052 /* The return value is free'd in timezone.cpp on Windows because 1053 * the other code path returns a pointer to a heap location. */ 1054 return uprv_strdup(U_TZNAME[n]); 1055#else 1056 /* 1057 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. 1058 So we remap the abbreviation to an olson ID. 1059 1060 Since Windows exposes a little more timezone information, 1061 we normally don't use this code on Windows because 1062 uprv_detectWindowsTimeZone should have already given the correct answer. 1063 */ 1064 { 1065 struct tm juneSol, decemberSol; 1066 int daylightType; 1067 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ 1068 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ 1069 1070 /* This probing will tell us when daylight savings occurs. */ 1071 localtime_r(&juneSolstice, &juneSol); 1072 localtime_r(&decemberSolstice, &decemberSol); 1073 daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0); 1074 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); 1075 if (tzid != NULL) { 1076 return tzid; 1077 } 1078 } 1079 return U_TZNAME[n]; 1080#endif 1081#else 1082 return ""; 1083#endif 1084} 1085 1086/* Get and set the ICU data directory --------------------------------------- */ 1087 1088static char *gDataDirectory = NULL; 1089#if U_POSIX_LOCALE 1090 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ 1091#endif 1092 1093static UBool U_CALLCONV putil_cleanup(void) 1094{ 1095 if (gDataDirectory && *gDataDirectory) { 1096 uprv_free(gDataDirectory); 1097 } 1098 gDataDirectory = NULL; 1099#if U_POSIX_LOCALE 1100 if (gCorrectedPOSIXLocale) { 1101 uprv_free(gCorrectedPOSIXLocale); 1102 gCorrectedPOSIXLocale = NULL; 1103 } 1104#endif 1105 return TRUE; 1106} 1107 1108/* 1109 * Set the data directory. 1110 * Make a copy of the passed string, and set the global data dir to point to it. 1111 * TODO: see bug #2849, regarding thread safety. 1112 */ 1113U_CAPI void U_EXPORT2 1114u_setDataDirectory(const char *directory) { 1115 char *newDataDir; 1116 int32_t length; 1117 1118 if(directory==NULL || *directory==0) { 1119 /* A small optimization to prevent the malloc and copy when the 1120 shared library is used, and this is a way to make sure that NULL 1121 is never returned. 1122 */ 1123 newDataDir = (char *)""; 1124 } 1125 else { 1126 length=(int32_t)uprv_strlen(directory); 1127 newDataDir = (char *)uprv_malloc(length + 2); 1128 /* Exit out if newDataDir could not be created. */ 1129 if (newDataDir == NULL) { 1130 return; 1131 } 1132 uprv_strcpy(newDataDir, directory); 1133 1134#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1135 { 1136 char *p; 1137 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { 1138 *p = U_FILE_SEP_CHAR; 1139 } 1140 } 1141#endif 1142 } 1143 1144 umtx_lock(NULL); 1145 if (gDataDirectory && *gDataDirectory) { 1146 uprv_free(gDataDirectory); 1147 } 1148 gDataDirectory = newDataDir; 1149 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1150 umtx_unlock(NULL); 1151} 1152 1153U_CAPI UBool U_EXPORT2 1154uprv_pathIsAbsolute(const char *path) 1155{ 1156 if(!path || !*path) { 1157 return FALSE; 1158 } 1159 1160 if(*path == U_FILE_SEP_CHAR) { 1161 return TRUE; 1162 } 1163 1164#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1165 if(*path == U_FILE_ALT_SEP_CHAR) { 1166 return TRUE; 1167 } 1168#endif 1169 1170#if defined(U_WINDOWS) 1171 if( (((path[0] >= 'A') && (path[0] <= 'Z')) || 1172 ((path[0] >= 'a') && (path[0] <= 'z'))) && 1173 path[1] == ':' ) { 1174 return TRUE; 1175 } 1176#endif 1177 1178 return FALSE; 1179} 1180 1181/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR 1182 until some client wrapper makefiles are updated */ 1183#if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR 1184# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1185# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" 1186# endif 1187#endif 1188 1189U_CAPI const char * U_EXPORT2 1190u_getDataDirectory(void) { 1191 const char *path = NULL; 1192#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1193 char datadir_path_buffer[PATH_MAX]; 1194#endif 1195 1196 /* if we have the directory, then return it immediately */ 1197 UMTX_CHECK(NULL, gDataDirectory, path); 1198 1199 if(path) { 1200 return path; 1201 } 1202 1203 /* 1204 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to 1205 override ICU's data with the ICU_DATA environment variable. This prevents 1206 problems where multiple custom copies of ICU's specific version of data 1207 are installed on a system. Either the application must define the data 1208 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling 1209 ICU, set the data with udata_setCommonData or trust that all of the 1210 required data is contained in ICU's data library that contains 1211 the entry point defined by U_ICUDATA_ENTRY_POINT. 1212 1213 There may also be some platforms where environment variables 1214 are not allowed. 1215 */ 1216# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO 1217 /* First try to get the environment variable */ 1218 path=getenv("ICU_DATA"); 1219# endif 1220 1221 /* ICU_DATA_DIR may be set as a compile option. 1222 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time 1223 * and is used only when data is built in archive mode eliminating the need 1224 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation 1225 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to 1226 * set their own path. 1227 */ 1228#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) 1229 if(path==NULL || *path==0) { 1230# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1231 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); 1232# endif 1233# ifdef ICU_DATA_DIR 1234 path=ICU_DATA_DIR; 1235# else 1236 path=U_ICU_DATA_DEFAULT_DIR; 1237# endif 1238# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1239 if (prefix != NULL) { 1240 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); 1241 path=datadir_path_buffer; 1242 } 1243# endif 1244 } 1245#endif 1246 1247 if(path==NULL) { 1248 /* It looks really bad, set it to something. */ 1249 path = ""; 1250 } 1251 1252 u_setDataDirectory(path); 1253 return gDataDirectory; 1254} 1255 1256 1257 1258 1259 1260/* Macintosh-specific locale information ------------------------------------ */ 1261#ifdef XP_MAC 1262 1263typedef struct { 1264 int32_t script; 1265 int32_t region; 1266 int32_t lang; 1267 int32_t date_region; 1268 const char* posixID; 1269} mac_lc_rec; 1270 1271/* Todo: This will be updated with a newer version from www.unicode.org web 1272 page when it's available.*/ 1273#define MAC_LC_MAGIC_NUMBER -5 1274#define MAC_LC_INIT_NUMBER -9 1275 1276static const mac_lc_rec mac_lc_recs[] = { 1277 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US", 1278 /* United States*/ 1279 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR", 1280 /* France*/ 1281 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB", 1282 /* Great Britain*/ 1283 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE", 1284 /* Germany*/ 1285 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT", 1286 /* Italy*/ 1287 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL", 1288 /* Metherlands*/ 1289 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE", 1290 /* French for Belgium or Lxembourg*/ 1291 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE", 1292 /* Sweden*/ 1293 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK", 1294 /* Denmark*/ 1295 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT", 1296 /* Portugal*/ 1297 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA", 1298 /* French Canada*/ 1299 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS", 1300 /* Israel*/ 1301 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP", 1302 /* Japan*/ 1303 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU", 1304 /* Australia*/ 1305 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE", 1306 /* the Arabic world (?)*/ 1307 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI", 1308 /* Finland*/ 1309 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH", 1310 /* French for Switzerland*/ 1311 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH", 1312 /* German for Switzerland*/ 1313 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR", 1314 /* Greece*/ 1315 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS", 1316 /* Iceland ===*/ 1317 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/ 1318 /* Malta ===*/ 1319 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/ 1320 /* Cyprus ===*/ 1321 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR", 1322 /* Turkey ===*/ 1323 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU", 1324 /* Croatian system for Yugoslavia*/ 1325 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/ 1326 /* Hindi system for India*/ 1327 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/ 1328 /* Pakistan*/ 1329 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT", 1330 /* Lithuania*/ 1331 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL", 1332 /* Poland*/ 1333 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU", 1334 /* Hungary*/ 1335 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE", 1336 /* Estonia*/ 1337 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV", 1338 /* Latvia*/ 1339 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/ 1340 /* Lapland [Ask Rich for the data. HS]*/ 1341 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/ 1342 /* Faeroe Islands*/ 1343 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR", 1344 /* Iran*/ 1345 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU", 1346 /* Russia*/ 1347 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE", 1348 /* Ireland*/ 1349 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR", 1350 /* Korea*/ 1351 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN", 1352 /* People's Republic of China*/ 1353 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW", 1354 /* Taiwan*/ 1355 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH", 1356 /* Thailand*/ 1357 1358 /* fallback is en_US*/ 1359 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1360 MAC_LC_MAGIC_NUMBER, "en_US" 1361}; 1362 1363#endif 1364 1365#if U_POSIX_LOCALE 1366/* A helper function used by uprv_getPOSIXIDForDefaultLocale and 1367 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for 1368 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. 1369 */ 1370static const char *uprv_getPOSIXIDForCategory(int category) 1371{ 1372 const char* posixID = NULL; 1373 if (category == LC_MESSAGES || category == LC_CTYPE) { 1374 /* 1375 * On Solaris two different calls to setlocale can result in 1376 * different values. Only get this value once. 1377 * 1378 * We must check this first because an application can set this. 1379 * 1380 * LC_ALL can't be used because it's platform dependent. The LANG 1381 * environment variable seems to affect LC_CTYPE variable by default. 1382 * Here is what setlocale(LC_ALL, NULL) can return. 1383 * HPUX can return 'C C C C C C C' 1384 * Solaris can return /en_US/C/C/C/C/C on the second try. 1385 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... 1386 * 1387 * The default codepage detection also needs to use LC_CTYPE. 1388 * 1389 * Do not call setlocale(LC_*, "")! Using an empty string instead 1390 * of NULL, will modify the libc behavior. 1391 */ 1392 posixID = setlocale(category, NULL); 1393 if ((posixID == 0) 1394 || (uprv_strcmp("C", posixID) == 0) 1395 || (uprv_strcmp("POSIX", posixID) == 0)) 1396 { 1397 /* Maybe we got some garbage. Try something more reasonable */ 1398 posixID = getenv("LC_ALL"); 1399 if (posixID == 0) { 1400 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); 1401 if (posixID == 0) { 1402 posixID = getenv("LANG"); 1403 } 1404 } 1405 } 1406 } 1407 if ((posixID==0) 1408 || (uprv_strcmp("C", posixID) == 0) 1409 || (uprv_strcmp("POSIX", posixID) == 0)) 1410 { 1411 /* Nothing worked. Give it a nice POSIX default value. */ 1412 posixID = "en_US_POSIX"; 1413 } 1414 return posixID; 1415} 1416 1417/* Return just the POSIX id for the default locale, whatever happens to be in 1418 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. 1419 */ 1420static const char *uprv_getPOSIXIDForDefaultLocale(void) 1421{ 1422 static const char* posixID = NULL; 1423 if (posixID == 0) { 1424 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); 1425 } 1426 return posixID; 1427} 1428 1429/* Return just the POSIX id for the default codepage, whatever happens to be in 1430 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. 1431 */ 1432static const char *uprv_getPOSIXIDForDefaultCodepage(void) 1433{ 1434 static const char* posixID = NULL; 1435 if (posixID == 0) { 1436 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); 1437 } 1438 return posixID; 1439} 1440#endif 1441 1442/* NOTE: The caller should handle thread safety */ 1443U_CAPI const char* U_EXPORT2 1444uprv_getDefaultLocaleID() 1445{ 1446#if U_POSIX_LOCALE 1447/* 1448 Note that: (a '!' means the ID is improper somehow) 1449 LC_ALL ----> default_loc codepage 1450-------------------------------------------------------- 1451 ab.CD ab CD 1452 ab@CD ab__CD - 1453 ab@CD.EF ab__CD EF 1454 1455 ab_CD.EF@GH ab_CD_GH EF 1456 1457Some 'improper' ways to do the same as above: 1458 ! ab_CD@GH.EF ab_CD_GH EF 1459 ! ab_CD.EF@GH.IJ ab_CD_GH EF 1460 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF 1461 1462 _CD@GH _CD_GH - 1463 _CD.EF@GH _CD_GH EF 1464 1465The variant cannot have dots in it. 1466The 'rightmost' variant (@xxx) wins. 1467The leftmost codepage (.xxx) wins. 1468*/ 1469 char *correctedPOSIXLocale = 0; 1470 const char* posixID = uprv_getPOSIXIDForDefaultLocale(); 1471 const char *p; 1472 const char *q; 1473 int32_t len; 1474 1475 /* Format: (no spaces) 1476 ll [ _CC ] [ . MM ] [ @ VV] 1477 1478 l = lang, C = ctry, M = charmap, V = variant 1479 */ 1480 1481 if (gCorrectedPOSIXLocale != NULL) { 1482 return gCorrectedPOSIXLocale; 1483 } 1484 1485 if ((p = uprv_strchr(posixID, '.')) != NULL) { 1486 /* assume new locale can't be larger than old one? */ 1487 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1); 1488 /* Exit on memory allocation error. */ 1489 if (correctedPOSIXLocale == NULL) { 1490 return NULL; 1491 } 1492 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1493 correctedPOSIXLocale[p-posixID] = 0; 1494 1495 /* do not copy after the @ */ 1496 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { 1497 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; 1498 } 1499 } 1500 1501 /* Note that we scan the *uncorrected* ID. */ 1502 if ((p = uprv_strrchr(posixID, '@')) != NULL) { 1503 if (correctedPOSIXLocale == NULL) { 1504 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1); 1505 /* Exit on memory allocation error. */ 1506 if (correctedPOSIXLocale == NULL) { 1507 return NULL; 1508 } 1509 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1510 correctedPOSIXLocale[p-posixID] = 0; 1511 } 1512 p++; 1513 1514 /* Take care of any special cases here.. */ 1515 if (!uprv_strcmp(p, "nynorsk")) { 1516 p = "NY"; 1517 /* Don't worry about no__NY. In practice, it won't appear. */ 1518 } 1519 1520 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { 1521 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ 1522 } 1523 else { 1524 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ 1525 } 1526 1527 if ((q = uprv_strchr(p, '.')) != NULL) { 1528 /* How big will the resulting string be? */ 1529 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); 1530 uprv_strncat(correctedPOSIXLocale, p, q-p); 1531 correctedPOSIXLocale[len] = 0; 1532 } 1533 else { 1534 /* Anything following the @ sign */ 1535 uprv_strcat(correctedPOSIXLocale, p); 1536 } 1537 1538 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? 1539 * How about 'russian' -> 'ru'? 1540 * Many of the other locales using ISO codes will be handled by the 1541 * canonicalization functions in uloc_getDefault. 1542 */ 1543 } 1544 1545 /* Was a correction made? */ 1546 if (correctedPOSIXLocale != NULL) { 1547 posixID = correctedPOSIXLocale; 1548 } 1549 else { 1550 /* copy it, just in case the original pointer goes away. See j2395 */ 1551 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); 1552 /* Exit on memory allocation error. */ 1553 if (correctedPOSIXLocale == NULL) { 1554 return NULL; 1555 } 1556 posixID = uprv_strcpy(correctedPOSIXLocale, posixID); 1557 } 1558 1559 if (gCorrectedPOSIXLocale == NULL) { 1560 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1561 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1562 correctedPOSIXLocale = NULL; 1563 } 1564 1565 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ 1566 uprv_free(correctedPOSIXLocale); 1567 } 1568 1569 return posixID; 1570 1571#elif defined(U_WINDOWS) || defined(U_MINGW) 1572 UErrorCode status = U_ZERO_ERROR; 1573 LCID id = GetThreadLocale(); 1574 const char* locID = uprv_convertToPosix(id, &status); 1575 1576 if (U_FAILURE(status)) { 1577 locID = "en_US"; 1578 } 1579 return locID; 1580 1581#elif defined(XP_MAC) 1582 int32_t script = MAC_LC_INIT_NUMBER; 1583 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/ 1584 int32_t region = MAC_LC_INIT_NUMBER; 1585 /* = GetScriptManagerVariable(smRegionCode);*/ 1586 int32_t lang = MAC_LC_INIT_NUMBER; 1587 /* = GetScriptManagerVariable(smScriptLang);*/ 1588 int32_t date_region = MAC_LC_INIT_NUMBER; 1589 const char* posixID = 0; 1590 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec); 1591 int32_t i; 1592 Intl1Hndl ih; 1593 1594 ih = (Intl1Hndl) GetIntlResource(1); 1595 if (ih) 1596 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8; 1597 1598 for (i = 0; i < count; i++) { 1599 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER) 1600 || (mac_lc_recs[i].script == script)) 1601 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER) 1602 || (mac_lc_recs[i].region == region)) 1603 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER) 1604 || (mac_lc_recs[i].lang == lang)) 1605 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER) 1606 || (mac_lc_recs[i].date_region == date_region)) 1607 ) 1608 { 1609 posixID = mac_lc_recs[i].posixID; 1610 break; 1611 } 1612 } 1613 1614 return posixID; 1615 1616#elif defined(OS400) 1617 /* locales are process scoped and are by definition thread safe */ 1618 static char correctedLocale[64]; 1619 const char *localeID = getenv("LC_ALL"); 1620 char *p; 1621 1622 if (localeID == NULL) 1623 localeID = getenv("LANG"); 1624 if (localeID == NULL) 1625 localeID = setlocale(LC_ALL, NULL); 1626 /* Make sure we have something... */ 1627 if (localeID == NULL) 1628 return "en_US_POSIX"; 1629 1630 /* Extract the locale name from the path. */ 1631 if((p = uprv_strrchr(localeID, '/')) != NULL) 1632 { 1633 /* Increment p to start of locale name. */ 1634 p++; 1635 localeID = p; 1636 } 1637 1638 /* Copy to work location. */ 1639 uprv_strcpy(correctedLocale, localeID); 1640 1641 /* Strip off the '.locale' extension. */ 1642 if((p = uprv_strchr(correctedLocale, '.')) != NULL) { 1643 *p = 0; 1644 } 1645 1646 /* Upper case the locale name. */ 1647 T_CString_toUpperCase(correctedLocale); 1648 1649 /* See if we are using the POSIX locale. Any of the 1650 * following are equivalent and use the same QLGPGCMA 1651 * (POSIX) locale. 1652 * QLGPGCMA2 means UCS2 1653 * QLGPGCMA_4 means UTF-32 1654 * QLGPGCMA_8 means UTF-8 1655 */ 1656 if ((uprv_strcmp("C", correctedLocale) == 0) || 1657 (uprv_strcmp("POSIX", correctedLocale) == 0) || 1658 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) 1659 { 1660 uprv_strcpy(correctedLocale, "en_US_POSIX"); 1661 } 1662 else 1663 { 1664 int16_t LocaleLen; 1665 1666 /* Lower case the lang portion. */ 1667 for(p = correctedLocale; *p != 0 && *p != '_'; p++) 1668 { 1669 *p = uprv_tolower(*p); 1670 } 1671 1672 /* Adjust for Euro. After '_E' add 'URO'. */ 1673 LocaleLen = uprv_strlen(correctedLocale); 1674 if (correctedLocale[LocaleLen - 2] == '_' && 1675 correctedLocale[LocaleLen - 1] == 'E') 1676 { 1677 uprv_strcat(correctedLocale, "URO"); 1678 } 1679 1680 /* If using Lotus-based locale then convert to 1681 * equivalent non Lotus. 1682 */ 1683 else if (correctedLocale[LocaleLen - 2] == '_' && 1684 correctedLocale[LocaleLen - 1] == 'L') 1685 { 1686 correctedLocale[LocaleLen - 2] = 0; 1687 } 1688 1689 /* There are separate simplified and traditional 1690 * locales called zh_HK_S and zh_HK_T. 1691 */ 1692 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) 1693 { 1694 uprv_strcpy(correctedLocale, "zh_HK"); 1695 } 1696 1697 /* A special zh_CN_GBK locale... 1698 */ 1699 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) 1700 { 1701 uprv_strcpy(correctedLocale, "zh_CN"); 1702 } 1703 1704 } 1705 1706 return correctedLocale; 1707#endif 1708 1709} 1710 1711#if !U_CHARSET_IS_UTF8 1712#if U_POSIX_LOCALE 1713/* 1714Due to various platform differences, one platform may specify a charset, 1715when they really mean a different charset. Remap the names so that they are 1716compatible with ICU. Only conflicting/ambiguous aliases should be resolved 1717here. Before adding anything to this function, please consider adding unique 1718names to the ICU alias table in the data directory. 1719*/ 1720static const char* 1721remapPlatformDependentCodepage(const char *locale, const char *name) { 1722 if (locale != NULL && *locale == 0) { 1723 /* Make sure that an empty locale is handled the same way. */ 1724 locale = NULL; 1725 } 1726 if (name == NULL) { 1727 return NULL; 1728 } 1729#if defined(U_AIX) 1730 if (uprv_strcmp(name, "IBM-943") == 0) { 1731 /* Use the ASCII compatible ibm-943 */ 1732 name = "Shift-JIS"; 1733 } 1734 else if (uprv_strcmp(name, "IBM-1252") == 0) { 1735 /* Use the windows-1252 that contains the Euro */ 1736 name = "IBM-5348"; 1737 } 1738#elif defined(U_SOLARIS) 1739 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { 1740 /* Solaris underspecifies the "EUC" name. */ 1741 if (uprv_strcmp(locale, "zh_CN") == 0) { 1742 name = "EUC-CN"; 1743 } 1744 else if (uprv_strcmp(locale, "zh_TW") == 0) { 1745 name = "EUC-TW"; 1746 } 1747 else if (uprv_strcmp(locale, "ko_KR") == 0) { 1748 name = "EUC-KR"; 1749 } 1750 } 1751 else if (uprv_strcmp(name, "eucJP") == 0) { 1752 /* 1753 ibm-954 is the best match. 1754 ibm-33722 is the default for eucJP (similar to Windows). 1755 */ 1756 name = "eucjis"; 1757 } 1758 else if (uprv_strcmp(name, "646") == 0) { 1759 /* 1760 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was 1761 * ISO-8859-1 instead of US-ASCII(646). 1762 */ 1763 name = "ISO-8859-1"; 1764 } 1765#elif defined(U_DARWIN) 1766 if (locale == NULL && *name == 0) { 1767 /* 1768 No locale was specified, and an empty name was passed in. 1769 This usually indicates that nl_langinfo didn't return valid information. 1770 Mac OS X uses UTF-8 by default (especially the locale data and console). 1771 */ 1772 name = "UTF-8"; 1773 } 1774 else if (uprv_strcmp(name, "CP949") == 0) { 1775 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1776 name = "EUC-KR"; 1777 } 1778 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { 1779 /* 1780 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1781 */ 1782 name = "UTF-8"; 1783 } 1784#elif defined(U_BSD) 1785 if (uprv_strcmp(name, "CP949") == 0) { 1786 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1787 name = "EUC-KR"; 1788 } 1789#elif defined(U_HPUX) 1790 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { 1791 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ 1792 /* zh_TW.big5 is not the same charset as zh_HK.big5! */ 1793 name = "hkbig5"; 1794 } 1795 else if (uprv_strcmp(name, "eucJP") == 0) { 1796 /* 1797 ibm-1350 is the best match, but unavailable. 1798 ibm-954 is mostly a superset of ibm-1350. 1799 ibm-33722 is the default for eucJP (similar to Windows). 1800 */ 1801 name = "eucjis"; 1802 } 1803#elif defined(U_LINUX) 1804 if (locale != NULL && uprv_strcmp(name, "euc") == 0) { 1805 /* Linux underspecifies the "EUC" name. */ 1806 if (uprv_strcmp(locale, "korean") == 0) { 1807 name = "EUC-KR"; 1808 } 1809 else if (uprv_strcmp(locale, "japanese") == 0) { 1810 /* See comment below about eucJP */ 1811 name = "eucjis"; 1812 } 1813 } 1814 else if (uprv_strcmp(name, "eucjp") == 0) { 1815 /* 1816 ibm-1350 is the best match, but unavailable. 1817 ibm-954 is mostly a superset of ibm-1350. 1818 ibm-33722 is the default for eucJP (similar to Windows). 1819 */ 1820 name = "eucjis"; 1821 } 1822 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && 1823 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { 1824 /* 1825 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1826 */ 1827 name = "UTF-8"; 1828 } 1829 /* 1830 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of 1831 * it by falling back to 'US-ASCII' when NULL is returned from this 1832 * function. So, we don't have to worry about it here. 1833 */ 1834#endif 1835 /* return NULL when "" is passed in */ 1836 if (*name == 0) { 1837 name = NULL; 1838 } 1839 return name; 1840} 1841 1842static const char* 1843getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) 1844{ 1845 char localeBuf[100]; 1846 const char *name = NULL; 1847 char *variant = NULL; 1848 1849 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { 1850 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); 1851 uprv_strncpy(localeBuf, localeName, localeCapacity); 1852 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ 1853 name = uprv_strncpy(buffer, name+1, buffCapacity); 1854 buffer[buffCapacity-1] = 0; /* ensure NULL termination */ 1855 if ((variant = (uprv_strchr(name, '@'))) != NULL) { 1856 *variant = 0; 1857 } 1858 name = remapPlatformDependentCodepage(localeBuf, name); 1859 } 1860 return name; 1861} 1862#endif 1863 1864static const char* 1865int_getDefaultCodepage() 1866{ 1867#if defined(OS400) 1868 uint32_t ccsid = 37; /* Default to ibm-37 */ 1869 static char codepage[64]; 1870 Qwc_JOBI0400_t jobinfo; 1871 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ 1872 1873 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", 1874 "* ", " ", &error); 1875 1876 if (error.Bytes_Available == 0) { 1877 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { 1878 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; 1879 } 1880 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { 1881 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; 1882 } 1883 /* else use the default */ 1884 } 1885 sprintf(codepage,"ibm-%d", ccsid); 1886 return codepage; 1887 1888#elif defined(OS390) 1889 static char codepage[64]; 1890 1891 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); 1892 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); 1893 codepage[63] = 0; /* NULL terminate */ 1894 1895 return codepage; 1896 1897#elif defined(XP_MAC) 1898 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */ 1899 1900#elif defined(U_WINDOWS) 1901 static char codepage[64]; 1902 sprintf(codepage, "windows-%d", GetACP()); 1903 return codepage; 1904 1905#elif U_POSIX_LOCALE 1906 static char codesetName[100]; 1907 const char *localeName = NULL; 1908 const char *name = NULL; 1909 1910 localeName = uprv_getPOSIXIDForDefaultCodepage(); 1911 uprv_memset(codesetName, 0, sizeof(codesetName)); 1912#if U_HAVE_NL_LANGINFO_CODESET 1913 /* When available, check nl_langinfo first because it usually gives more 1914 useful names. It depends on LC_CTYPE. 1915 nl_langinfo may use the same buffer as setlocale. */ 1916 { 1917 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); 1918#if defined(U_DARWIN) || defined(U_LINUX) 1919 /* 1920 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 1921 * instead of ASCII. 1922 */ 1923 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { 1924 codeset = remapPlatformDependentCodepage(localeName, codeset); 1925 } else 1926#endif 1927 { 1928 codeset = remapPlatformDependentCodepage(NULL, codeset); 1929 } 1930 1931 if (codeset != NULL) { 1932 uprv_strncpy(codesetName, codeset, sizeof(codesetName)); 1933 codesetName[sizeof(codesetName)-1] = 0; 1934 return codesetName; 1935 } 1936 } 1937#endif 1938 1939 /* Use setlocale in a nice way, and then check some environment variables. 1940 Maybe the application used setlocale already. 1941 */ 1942 uprv_memset(codesetName, 0, sizeof(codesetName)); 1943 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); 1944 if (name) { 1945 /* if we can find the codeset name from setlocale, return that. */ 1946 return name; 1947 } 1948 1949 if (*codesetName == 0) 1950 { 1951 /* Everything failed. Return US ASCII (ISO 646). */ 1952 (void)uprv_strcpy(codesetName, "US-ASCII"); 1953 } 1954 return codesetName; 1955#else 1956 return "US-ASCII"; 1957#endif 1958} 1959 1960 1961U_CAPI const char* U_EXPORT2 1962uprv_getDefaultCodepage() 1963{ 1964 static char const *name = NULL; 1965 umtx_lock(NULL); 1966 if (name == NULL) { 1967 name = int_getDefaultCodepage(); 1968 } 1969 umtx_unlock(NULL); 1970 return name; 1971} 1972#endif /* !U_CHARSET_IS_UTF8 */ 1973 1974 1975/* end of platform-specific implementation -------------- */ 1976 1977/* version handling --------------------------------------------------------- */ 1978 1979U_CAPI void U_EXPORT2 1980u_versionFromString(UVersionInfo versionArray, const char *versionString) { 1981 char *end; 1982 uint16_t part=0; 1983 1984 if(versionArray==NULL) { 1985 return; 1986 } 1987 1988 if(versionString!=NULL) { 1989 for(;;) { 1990 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); 1991 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { 1992 break; 1993 } 1994 versionString=end+1; 1995 } 1996 } 1997 1998 while(part<U_MAX_VERSION_LENGTH) { 1999 versionArray[part++]=0; 2000 } 2001} 2002 2003U_CAPI void U_EXPORT2 2004u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { 2005 if(versionArray!=NULL && versionString!=NULL) { 2006 char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; 2007 int32_t len = u_strlen(versionString); 2008 if(len>U_MAX_VERSION_STRING_LENGTH) { 2009 len = U_MAX_VERSION_STRING_LENGTH; 2010 } 2011 u_UCharsToChars(versionString, versionChars, len); 2012 versionChars[len]=0; 2013 u_versionFromString(versionArray, versionChars); 2014 } 2015} 2016 2017U_CAPI void U_EXPORT2 2018u_versionToString(UVersionInfo versionArray, char *versionString) { 2019 uint16_t count, part; 2020 uint8_t field; 2021 2022 if(versionString==NULL) { 2023 return; 2024 } 2025 2026 if(versionArray==NULL) { 2027 versionString[0]=0; 2028 return; 2029 } 2030 2031 /* count how many fields need to be written */ 2032 for(count=4; count>0 && versionArray[count-1]==0; --count) { 2033 } 2034 2035 if(count <= 1) { 2036 count = 2; 2037 } 2038 2039 /* write the first part */ 2040 /* write the decimal field value */ 2041 field=versionArray[0]; 2042 if(field>=100) { 2043 *versionString++=(char)('0'+field/100); 2044 field%=100; 2045 } 2046 if(field>=10) { 2047 *versionString++=(char)('0'+field/10); 2048 field%=10; 2049 } 2050 *versionString++=(char)('0'+field); 2051 2052 /* write the following parts */ 2053 for(part=1; part<count; ++part) { 2054 /* write a dot first */ 2055 *versionString++=U_VERSION_DELIMITER; 2056 2057 /* write the decimal field value */ 2058 field=versionArray[part]; 2059 if(field>=100) { 2060 *versionString++=(char)('0'+field/100); 2061 field%=100; 2062 } 2063 if(field>=10) { 2064 *versionString++=(char)('0'+field/10); 2065 field%=10; 2066 } 2067 *versionString++=(char)('0'+field); 2068 } 2069 2070 /* NUL-terminate */ 2071 *versionString=0; 2072} 2073 2074U_CAPI void U_EXPORT2 2075u_getVersion(UVersionInfo versionArray) { 2076 u_versionFromString(versionArray, U_ICU_VERSION); 2077} 2078 2079/** 2080 * icucfg.h dependent code 2081 */ 2082 2083#if U_ENABLE_DYLOAD 2084 2085#if defined(U_CHECK_DYLOAD) 2086 2087#if defined(HAVE_DLOPEN) 2088 2089#ifdef HAVE_DLFCN_H 2090#ifdef __MVS__ 2091#ifndef __SUSV3 2092#define __SUSV3 1 2093#endif 2094#endif 2095#include <dlfcn.h> 2096#endif 2097 2098U_INTERNAL void * U_EXPORT2 2099uprv_dl_open(const char *libName, UErrorCode *status) { 2100 void *ret = NULL; 2101 if(U_FAILURE(*status)) return ret; 2102 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); 2103 if(ret==NULL) { 2104#ifndef U_TRACE_DYLOAD 2105 perror("dlopen"); 2106#endif 2107 *status = U_MISSING_RESOURCE_ERROR; 2108 } 2109 return ret; 2110} 2111 2112U_INTERNAL void U_EXPORT2 2113uprv_dl_close(void *lib, UErrorCode *status) { 2114 if(U_FAILURE(*status)) return; 2115 dlclose(lib); 2116} 2117 2118U_INTERNAL UVoidFunction* U_EXPORT2 2119uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2120 union { 2121 void* voidPtr; 2122 UVoidFunction* voidFunc; 2123 } ret; 2124 ret.voidPtr = NULL; 2125 if(U_FAILURE(*status)) return NULL; 2126 /* 2127 * ISO forbids the following cast, but it's needed for dlsym. 2128 * See: http://pubs.opengroup.org/onlinepubs/009695399/functions/dlsym.html 2129 * See: http://www.trilithium.com/johan/2004/12/problem-with-dlsym/ 2130 */ 2131 ret.voidPtr = dlsym(lib, sym); 2132 if(ret.voidPtr == NULL) { 2133 *status = U_MISSING_RESOURCE_ERROR; 2134 } 2135 return ret.voidFunc; 2136} 2137 2138#else 2139 2140/* null (nonexistent) implementation. */ 2141 2142U_INTERNAL void * U_EXPORT2 2143uprv_dl_open(const char *libName, UErrorCode *status) { 2144 if(U_FAILURE(*status)) return NULL; 2145 *status = U_UNSUPPORTED_ERROR; 2146 return NULL; 2147} 2148 2149U_INTERNAL void U_EXPORT2 2150uprv_dl_close(void *lib, UErrorCode *status) { 2151 if(U_FAILURE(*status)) return; 2152 *status = U_UNSUPPORTED_ERROR; 2153 return; 2154} 2155 2156 2157U_INTERNAL UVoidFunction* U_EXPORT2 2158uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2159 if(U_SUCCESS(*status)) { 2160 *status = U_UNSUPPORTED_ERROR; 2161 } 2162 return (UVoidFunction*)NULL; 2163} 2164 2165 2166 2167#endif 2168 2169#elif defined U_WINDOWS 2170 2171U_INTERNAL void * U_EXPORT2 2172uprv_dl_open(const char *libName, UErrorCode *status) { 2173 HMODULE lib = NULL; 2174 2175 if(U_FAILURE(*status)) return NULL; 2176 2177 lib = LoadLibraryA(libName); 2178 2179 if(lib==NULL) { 2180 *status = U_MISSING_RESOURCE_ERROR; 2181 } 2182 2183 return (void*)lib; 2184} 2185 2186U_INTERNAL void U_EXPORT2 2187uprv_dl_close(void *lib, UErrorCode *status) { 2188 HMODULE handle = (HMODULE)lib; 2189 if(U_FAILURE(*status)) return; 2190 2191 FreeLibrary(handle); 2192 2193 return; 2194} 2195 2196 2197U_INTERNAL UVoidFunction* U_EXPORT2 2198uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2199 HMODULE handle = (HMODULE)lib; 2200 UVoidFunction* addr = NULL; 2201 2202 if(U_FAILURE(*status) || lib==NULL) return NULL; 2203 2204 addr = (UVoidFunction*)GetProcAddress(handle, sym); 2205 2206 if(addr==NULL) { 2207 DWORD lastError = GetLastError(); 2208 if(lastError == ERROR_PROC_NOT_FOUND) { 2209 *status = U_MISSING_RESOURCE_ERROR; 2210 } else { 2211 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ 2212 } 2213 } 2214 2215 return addr; 2216} 2217 2218 2219#else 2220 2221/* No dynamic loading set. */ 2222 2223U_INTERNAL void * U_EXPORT2 2224uprv_dl_open(const char *libName, UErrorCode *status) { 2225 if(U_FAILURE(*status)) return NULL; 2226 *status = U_UNSUPPORTED_ERROR; 2227 return NULL; 2228} 2229 2230U_INTERNAL void U_EXPORT2 2231uprv_dl_close(void *lib, UErrorCode *status) { 2232 if(U_FAILURE(*status)) return; 2233 *status = U_UNSUPPORTED_ERROR; 2234 return; 2235} 2236 2237 2238U_INTERNAL UVoidFunction* U_EXPORT2 2239uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2240 if(U_SUCCESS(*status)) { 2241 *status = U_UNSUPPORTED_ERROR; 2242 } 2243 return (UVoidFunction*)NULL; 2244} 2245 2246 2247#endif 2248 2249#endif /* U_ENABLE_DYLOAD */ 2250 2251/* 2252 * Hey, Emacs, please set the following: 2253 * 2254 * Local Variables: 2255 * indent-tabs-mode: nil 2256 * End: 2257 * 2258 */ 2259