1// Copyright (C) 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4****************************************************************************** 5* 6* Copyright (C) 1997-2016, International Business Machines 7* Corporation and others. All Rights Reserved. 8* 9****************************************************************************** 10* 11* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) 12* 13* Date Name Description 14* 04/14/97 aliu Creation. 15* 04/24/97 aliu Added getDefaultDataDirectory() and 16* getDefaultLocaleID(). 17* 04/28/97 aliu Rewritten to assume Unix and apply general methods 18* for assumed case. Non-UNIX platforms must be 19* special-cased. Rewrote numeric methods dealing 20* with NaN and Infinity to be platform independent 21* over all IEEE 754 platforms. 22* 05/13/97 aliu Restored sign of timezone 23* (semantics are hours West of GMT) 24* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, 25* nextDouble.. 26* 07/22/98 stephen Added remainder, max, min, trunc 27* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity 28* 08/24/98 stephen Added longBitsFromDouble 29* 09/08/98 stephen Minor changes for Mac Port 30* 03/02/99 stephen Removed openFile(). Added AS400 support. 31* Fixed EBCDIC tables 32* 04/15/99 stephen Converted to C. 33* 06/28/99 stephen Removed mutex locking in u_isBigEndian(). 34* 08/04/99 jeffrey R. Added OS/2 changes 35* 11/15/99 helena Integrated S/390 IEEE support. 36* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID 37* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage 38* 01/03/08 Steven L. Fake Time Support 39****************************************************************************** 40*/ 41 42// Defines _XOPEN_SOURCE for access to POSIX functions. 43// Must be before any other #includes. 44#include "uposixdefs.h" 45 46/* include ICU headers */ 47#include "unicode/utypes.h" 48#include "unicode/putil.h" 49#include "unicode/ustring.h" 50#include "putilimp.h" 51#include "uassert.h" 52#include "umutex.h" 53#include "cmemory.h" 54#include "cstring.h" 55#include "locmap.h" 56#include "ucln_cmn.h" 57#include "charstr.h" 58 59/* Include standard headers. */ 60#include <stdio.h> 61#include <stdlib.h> 62#include <string.h> 63#include <math.h> 64#include <locale.h> 65#include <float.h> 66 67#ifndef U_COMMON_IMPLEMENTATION 68#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu 69#endif 70 71 72/* include system headers */ 73#if U_PLATFORM_USES_ONLY_WIN32_API 74 /* 75 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. 76 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) 77 * to use native APIs as much as possible? 78 */ 79# define WIN32_LEAN_AND_MEAN 80# define VC_EXTRALEAN 81# define NOUSER 82# define NOSERVICE 83# define NOIME 84# define NOMCX 85# include <windows.h> 86# include "wintz.h" 87#elif U_PLATFORM == U_PF_OS400 88# include <float.h> 89# include <qusec.h> /* error code structure */ 90# include <qusrjobi.h> 91# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ 92# include <mih/testptr.h> /* For uprv_maximumPtr */ 93#elif U_PLATFORM == U_PF_OS390 94# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ 95#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS 96# include <limits.h> 97# include <unistd.h> 98# if U_PLATFORM == U_PF_SOLARIS 99# ifndef _XPG4_2 100# define _XPG4_2 101# endif 102# endif 103#elif U_PLATFORM == U_PF_QNX 104# include <sys/neutrino.h> 105#endif 106 107#if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__) 108/* tzset isn't defined in strict ANSI on Cygwin and MinGW. */ 109#undef __STRICT_ANSI__ 110#endif 111 112/* 113 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. 114 */ 115#include <time.h> 116 117#if !U_PLATFORM_USES_ONLY_WIN32_API 118#include <sys/time.h> 119#endif 120 121/* 122 * Only include langinfo.h if we have a way to get the codeset. If we later 123 * depend on more feature, we can test on U_HAVE_NL_LANGINFO. 124 * 125 */ 126 127#if U_HAVE_NL_LANGINFO_CODESET 128#include <langinfo.h> 129#endif 130 131/** 132 * Simple things (presence of functions, etc) should just go in configure.in and be added to 133 * icucfg.h via autoheader. 134 */ 135#if U_PLATFORM_IMPLEMENTS_POSIX 136# if U_PLATFORM == U_PF_OS400 137# define HAVE_DLFCN_H 0 138# define HAVE_DLOPEN 0 139# else 140# ifndef HAVE_DLFCN_H 141# define HAVE_DLFCN_H 1 142# endif 143# ifndef HAVE_DLOPEN 144# define HAVE_DLOPEN 1 145# endif 146# endif 147# ifndef HAVE_GETTIMEOFDAY 148# define HAVE_GETTIMEOFDAY 1 149# endif 150#else 151# define HAVE_DLFCN_H 0 152# define HAVE_DLOPEN 0 153# define HAVE_GETTIMEOFDAY 0 154#endif 155 156U_NAMESPACE_USE 157 158/* Define the extension for data files, again... */ 159#define DATA_TYPE "dat" 160 161/* Leave this copyright notice here! */ 162static const char copyright[] = U_COPYRIGHT_STRING; 163 164/* floating point implementations ------------------------------------------- */ 165 166/* We return QNAN rather than SNAN*/ 167#define SIGN 0x80000000U 168 169/* Make it easy to define certain types of constants */ 170typedef union { 171 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ 172 double d64; 173} BitPatternConversion; 174static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; 175static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; 176 177/*--------------------------------------------------------------------------- 178 Platform utilities 179 Our general strategy is to assume we're on a POSIX platform. Platforms which 180 are non-POSIX must declare themselves so. The default POSIX implementation 181 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related 182 functions). 183 ---------------------------------------------------------------------------*/ 184 185#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400 186# undef U_POSIX_LOCALE 187#else 188# define U_POSIX_LOCALE 1 189#endif 190 191/* 192 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble 193 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). 194*/ 195#if !IEEE_754 196static char* 197u_topNBytesOfDouble(double* d, int n) 198{ 199#if U_IS_BIG_ENDIAN 200 return (char*)d; 201#else 202 return (char*)(d + 1) - n; 203#endif 204} 205 206static char* 207u_bottomNBytesOfDouble(double* d, int n) 208{ 209#if U_IS_BIG_ENDIAN 210 return (char*)(d + 1) - n; 211#else 212 return (char*)d; 213#endif 214} 215#endif /* !IEEE_754 */ 216 217#if IEEE_754 218static UBool 219u_signBit(double d) { 220 uint8_t hiByte; 221#if U_IS_BIG_ENDIAN 222 hiByte = *(uint8_t *)&d; 223#else 224 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); 225#endif 226 return (hiByte & 0x80) != 0; 227} 228#endif 229 230 231 232#if defined (U_DEBUG_FAKETIME) 233/* Override the clock to test things without having to move the system clock. 234 * Assumes POSIX gettimeofday() will function 235 */ 236UDate fakeClock_t0 = 0; /** Time to start the clock from **/ 237UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ 238UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ 239static UMutex fakeClockMutex = U_MUTEX_INTIALIZER; 240 241static UDate getUTCtime_real() { 242 struct timeval posixTime; 243 gettimeofday(&posixTime, NULL); 244 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 245} 246 247static UDate getUTCtime_fake() { 248 umtx_lock(&fakeClockMutex); 249 if(!fakeClock_set) { 250 UDate real = getUTCtime_real(); 251 const char *fake_start = getenv("U_FAKETIME_START"); 252 if((fake_start!=NULL) && (fake_start[0]!=0)) { 253 sscanf(fake_start,"%lf",&fakeClock_t0); 254 fakeClock_dt = fakeClock_t0 - real; 255 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" 256 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", 257 fakeClock_t0, fake_start, fakeClock_dt, real); 258 } else { 259 fakeClock_dt = 0; 260 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" 261 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); 262 } 263 fakeClock_set = TRUE; 264 } 265 umtx_unlock(&fakeClockMutex); 266 267 return getUTCtime_real() + fakeClock_dt; 268} 269#endif 270 271#if U_PLATFORM_USES_ONLY_WIN32_API 272typedef union { 273 int64_t int64; 274 FILETIME fileTime; 275} FileTimeConversion; /* This is like a ULARGE_INTEGER */ 276 277/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ 278#define EPOCH_BIAS INT64_C(116444736000000000) 279#define HECTONANOSECOND_PER_MILLISECOND 10000 280 281#endif 282 283/*--------------------------------------------------------------------------- 284 Universal Implementations 285 These are designed to work on all platforms. Try these, and if they 286 don't work on your platform, then special case your platform with new 287 implementations. 288---------------------------------------------------------------------------*/ 289 290U_CAPI UDate U_EXPORT2 291uprv_getUTCtime() 292{ 293#if defined(U_DEBUG_FAKETIME) 294 return getUTCtime_fake(); /* Hook for overriding the clock */ 295#else 296 return uprv_getRawUTCtime(); 297#endif 298} 299 300/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ 301U_CAPI UDate U_EXPORT2 302uprv_getRawUTCtime() 303{ 304#if U_PLATFORM_USES_ONLY_WIN32_API 305 306 FileTimeConversion winTime; 307 GetSystemTimeAsFileTime(&winTime.fileTime); 308 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); 309#else 310 311#if HAVE_GETTIMEOFDAY 312 struct timeval posixTime; 313 gettimeofday(&posixTime, NULL); 314 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 315#else 316 time_t epochtime; 317 time(&epochtime); 318 return (UDate)epochtime * U_MILLIS_PER_SECOND; 319#endif 320 321#endif 322} 323 324/*----------------------------------------------------------------------------- 325 IEEE 754 326 These methods detect and return NaN and infinity values for doubles 327 conforming to IEEE 754. Platforms which support this standard include X86, 328 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. 329 If this doesn't work on your platform, you have non-IEEE floating-point, and 330 will need to code your own versions. A naive implementation is to return 0.0 331 for getNaN and getInfinity, and false for isNaN and isInfinite. 332 ---------------------------------------------------------------------------*/ 333 334U_CAPI UBool U_EXPORT2 335uprv_isNaN(double number) 336{ 337#if IEEE_754 338 BitPatternConversion convertedNumber; 339 convertedNumber.d64 = number; 340 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ 341 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); 342 343#elif U_PLATFORM == U_PF_OS390 344 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 345 sizeof(uint32_t)); 346 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 347 sizeof(uint32_t)); 348 349 return ((highBits & 0x7F080000L) == 0x7F080000L) && 350 (lowBits == 0x00000000L); 351 352#else 353 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 354 /* you'll need to replace this default implementation with what's correct*/ 355 /* for your platform.*/ 356 return number != number; 357#endif 358} 359 360U_CAPI UBool U_EXPORT2 361uprv_isInfinite(double number) 362{ 363#if IEEE_754 364 BitPatternConversion convertedNumber; 365 convertedNumber.d64 = number; 366 /* Infinity is exactly 0x7FF0000000000000U. */ 367 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); 368#elif U_PLATFORM == U_PF_OS390 369 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 370 sizeof(uint32_t)); 371 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 372 sizeof(uint32_t)); 373 374 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); 375 376#else 377 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 378 /* value, you'll need to replace this default implementation with what's*/ 379 /* correct for your platform.*/ 380 return number == (2.0 * number); 381#endif 382} 383 384U_CAPI UBool U_EXPORT2 385uprv_isPositiveInfinity(double number) 386{ 387#if IEEE_754 || U_PLATFORM == U_PF_OS390 388 return (UBool)(number > 0 && uprv_isInfinite(number)); 389#else 390 return uprv_isInfinite(number); 391#endif 392} 393 394U_CAPI UBool U_EXPORT2 395uprv_isNegativeInfinity(double number) 396{ 397#if IEEE_754 || U_PLATFORM == U_PF_OS390 398 return (UBool)(number < 0 && uprv_isInfinite(number)); 399 400#else 401 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 402 sizeof(uint32_t)); 403 return((highBits & SIGN) && uprv_isInfinite(number)); 404 405#endif 406} 407 408U_CAPI double U_EXPORT2 409uprv_getNaN() 410{ 411#if IEEE_754 || U_PLATFORM == U_PF_OS390 412 return gNan.d64; 413#else 414 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 415 /* you'll need to replace this default implementation with what's correct*/ 416 /* for your platform.*/ 417 return 0.0; 418#endif 419} 420 421U_CAPI double U_EXPORT2 422uprv_getInfinity() 423{ 424#if IEEE_754 || U_PLATFORM == U_PF_OS390 425 return gInf.d64; 426#else 427 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 428 /* value, you'll need to replace this default implementation with what's*/ 429 /* correct for your platform.*/ 430 return 0.0; 431#endif 432} 433 434U_CAPI double U_EXPORT2 435uprv_floor(double x) 436{ 437 return floor(x); 438} 439 440U_CAPI double U_EXPORT2 441uprv_ceil(double x) 442{ 443 return ceil(x); 444} 445 446U_CAPI double U_EXPORT2 447uprv_round(double x) 448{ 449 return uprv_floor(x + 0.5); 450} 451 452U_CAPI double U_EXPORT2 453uprv_fabs(double x) 454{ 455 return fabs(x); 456} 457 458U_CAPI double U_EXPORT2 459uprv_modf(double x, double* y) 460{ 461 return modf(x, y); 462} 463 464U_CAPI double U_EXPORT2 465uprv_fmod(double x, double y) 466{ 467 return fmod(x, y); 468} 469 470U_CAPI double U_EXPORT2 471uprv_pow(double x, double y) 472{ 473 /* This is declared as "double pow(double x, double y)" */ 474 return pow(x, y); 475} 476 477U_CAPI double U_EXPORT2 478uprv_pow10(int32_t x) 479{ 480 return pow(10.0, (double)x); 481} 482 483U_CAPI double U_EXPORT2 484uprv_fmax(double x, double y) 485{ 486#if IEEE_754 487 /* first handle NaN*/ 488 if(uprv_isNaN(x) || uprv_isNaN(y)) 489 return uprv_getNaN(); 490 491 /* check for -0 and 0*/ 492 if(x == 0.0 && y == 0.0 && u_signBit(x)) 493 return y; 494 495#endif 496 497 /* this should work for all flt point w/o NaN and Inf special cases */ 498 return (x > y ? x : y); 499} 500 501U_CAPI double U_EXPORT2 502uprv_fmin(double x, double y) 503{ 504#if IEEE_754 505 /* first handle NaN*/ 506 if(uprv_isNaN(x) || uprv_isNaN(y)) 507 return uprv_getNaN(); 508 509 /* check for -0 and 0*/ 510 if(x == 0.0 && y == 0.0 && u_signBit(y)) 511 return y; 512 513#endif 514 515 /* this should work for all flt point w/o NaN and Inf special cases */ 516 return (x > y ? y : x); 517} 518 519/** 520 * Truncates the given double. 521 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 522 * This is different than calling floor() or ceil(): 523 * floor(3.3) = 3, floor(-3.3) = -4 524 * ceil(3.3) = 4, ceil(-3.3) = -3 525 */ 526U_CAPI double U_EXPORT2 527uprv_trunc(double d) 528{ 529#if IEEE_754 530 /* handle error cases*/ 531 if(uprv_isNaN(d)) 532 return uprv_getNaN(); 533 if(uprv_isInfinite(d)) 534 return uprv_getInfinity(); 535 536 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ 537 return ceil(d); 538 else 539 return floor(d); 540 541#else 542 return d >= 0 ? floor(d) : ceil(d); 543 544#endif 545} 546 547/** 548 * Return the largest positive number that can be represented by an integer 549 * type of arbitrary bit length. 550 */ 551U_CAPI double U_EXPORT2 552uprv_maxMantissa(void) 553{ 554 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; 555} 556 557U_CAPI double U_EXPORT2 558uprv_log(double d) 559{ 560 return log(d); 561} 562 563U_CAPI void * U_EXPORT2 564uprv_maximumPtr(void * base) 565{ 566#if U_PLATFORM == U_PF_OS400 567 /* 568 * With the provided function we should never be out of range of a given segment 569 * (a traditional/typical segment that is). Our segments have 5 bytes for the 570 * id and 3 bytes for the offset. The key is that the casting takes care of 571 * only retrieving the offset portion minus x1000. Hence, the smallest offset 572 * seen in a program is x001000 and when casted to an int would be 0. 573 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. 574 * 575 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is 576 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). 577 * This function determines the activation based on the pointer that is passed in and 578 * calculates the appropriate maximum available size for 579 * each pointer type (TERASPACE and non-TERASPACE) 580 * 581 * Unlike other operating systems, the pointer model isn't determined at 582 * compile time on i5/OS. 583 */ 584 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { 585 /* if it is a TERASPACE pointer the max is 2GB - 4k */ 586 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); 587 } 588 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ 589 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); 590 591#else 592 return U_MAX_PTR(base); 593#endif 594} 595 596/*--------------------------------------------------------------------------- 597 Platform-specific Implementations 598 Try these, and if they don't work on your platform, then special case your 599 platform with new implementations. 600 ---------------------------------------------------------------------------*/ 601 602/* Generic time zone layer -------------------------------------------------- */ 603 604/* Time zone utilities */ 605U_CAPI void U_EXPORT2 606uprv_tzset() 607{ 608#if defined(U_TZSET) 609 U_TZSET(); 610#else 611 /* no initialization*/ 612#endif 613} 614 615U_CAPI int32_t U_EXPORT2 616uprv_timezone() 617{ 618#ifdef U_TIMEZONE 619 return U_TIMEZONE; 620#else 621 time_t t, t1, t2; 622 struct tm tmrec; 623 int32_t tdiff = 0; 624 625 time(&t); 626 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); 627#if U_PLATFORM != U_PF_IPHONE 628 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ 629#endif 630 t1 = mktime(&tmrec); /* local time in seconds*/ 631 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 632 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ 633 tdiff = t2 - t1; 634 635#if U_PLATFORM != U_PF_IPHONE 636 /* imitate NT behaviour, which returns same timezone offset to GMT for 637 winter and summer. 638 This does not work on all platforms. For instance, on glibc on Linux 639 and on Mac OS 10.5, tdiff calculated above remains the same 640 regardless of whether DST is in effect or not. iOS is another 641 platform where this does not work. Linux + glibc and Mac OS 10.5 642 have U_TIMEZONE defined so that this code is not reached. 643 */ 644 if (dst_checked) 645 tdiff += 3600; 646#endif 647 return tdiff; 648#endif 649} 650 651/* Note that U_TZNAME does *not* have to be tzname, but if it is, 652 some platforms need to have it declared here. */ 653 654#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API)) 655/* RS6000 and others reject char **tzname. */ 656extern U_IMPORT char *U_TZNAME[]; 657#endif 658 659#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) 660/* These platforms are likely to use Olson timezone IDs. */ 661#define CHECK_LOCALTIME_LINK 1 662#if U_PLATFORM_IS_DARWIN_BASED 663#include <tzfile.h> 664#define TZZONEINFO (TZDIR "/") 665#elif U_PLATFORM == U_PF_SOLARIS 666#define TZDEFAULT "/etc/localtime" 667#define TZZONEINFO "/usr/share/lib/zoneinfo/" 668#define TZZONEINFO2 "../usr/share/lib/zoneinfo/" 669#define TZ_ENV_CHECK "localtime" 670#else 671#define TZDEFAULT "/etc/localtime" 672#define TZZONEINFO "/usr/share/zoneinfo/" 673#endif 674#if U_HAVE_DIRENT_H 675#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ 676/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo 677 symlinked to /etc/localtime, which makes searchForTZFile return 678 'localtime' when it's the first match. */ 679#define TZFILE_SKIP2 "localtime" 680#define SEARCH_TZFILE 681#include <dirent.h> /* Needed to search through system timezone files */ 682#endif 683static char gTimeZoneBuffer[PATH_MAX]; 684static char *gTimeZoneBufferPtr = NULL; 685#endif 686 687#if !U_PLATFORM_USES_ONLY_WIN32_API 688#define isNonDigit(ch) (ch < '0' || '9' < ch) 689static UBool isValidOlsonID(const char *id) { 690 int32_t idx = 0; 691 692 /* Determine if this is something like Iceland (Olson ID) 693 or AST4ADT (non-Olson ID) */ 694 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { 695 idx++; 696 } 697 698 /* If we went through the whole string, then it might be okay. 699 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", 700 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. 701 The rest of the time it could be an Olson ID. George */ 702 return (UBool)(id[idx] == 0 703 || uprv_strcmp(id, "PST8PDT") == 0 704 || uprv_strcmp(id, "MST7MDT") == 0 705 || uprv_strcmp(id, "CST6CDT") == 0 706 || uprv_strcmp(id, "EST5EDT") == 0); 707} 708 709/* On some Unix-like OS, 'posix' subdirectory in 710 /usr/share/zoneinfo replicates the top-level contents. 'right' 711 subdirectory has the same set of files, but individual files 712 are different from those in the top-level directory or 'posix' 713 because 'right' has files for TAI (Int'l Atomic Time) while 'posix' 714 has files for UTC. 715 When the first match for /etc/localtime is in either of them 716 (usually in posix because 'right' has different file contents), 717 or TZ environment variable points to one of them, createTimeZone 718 fails because, say, 'posix/America/New_York' is not an Olson 719 timezone id ('America/New_York' is). So, we have to skip 720 'posix/' and 'right/' at the beginning. */ 721static void skipZoneIDPrefix(const char** id) { 722 if (uprv_strncmp(*id, "posix/", 6) == 0 723 || uprv_strncmp(*id, "right/", 6) == 0) 724 { 725 *id += 6; 726 } 727} 728#endif 729 730#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API 731 732#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) 733typedef struct OffsetZoneMapping { 734 int32_t offsetSeconds; 735 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ 736 const char *stdID; 737 const char *dstID; 738 const char *olsonID; 739} OffsetZoneMapping; 740 741enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; 742 743/* 744This list tries to disambiguate a set of abbreviated timezone IDs and offsets 745and maps it to an Olson ID. 746Before adding anything to this list, take a look at 747icu/source/tools/tzcode/tz.alias 748Sometimes no daylight savings (0) is important to define due to aliases. 749This list can be tested with icu/source/test/compat/tzone.pl 750More values could be added to daylightType to increase precision. 751*/ 752static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { 753 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, 754 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, 755 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, 756 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, 757 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, 758 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, 759 {-36000, 2, "EST", "EST", "Australia/Sydney"}, 760 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, 761 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, 762 {-34200, 2, "CST", "CST", "Australia/South"}, 763 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, 764 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, 765 {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, 766 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, 767 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, 768 {-28800, 2, "WST", "WST", "Australia/West"}, 769 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, 770 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, 771 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, 772 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, 773 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, 774 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, 775 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, 776 {-14400, 1, "AZT", "AZST", "Asia/Baku"}, 777 {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, 778 {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, 779 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, 780 {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, 781 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ 782 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, 783 {-3600, 0, "CET", "WEST", "Africa/Algiers"}, 784 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, 785 {0, 1, "GMT", "IST", "Europe/Dublin"}, 786 {0, 1, "GMT", "BST", "Europe/London"}, 787 {0, 0, "WET", "WEST", "Africa/Casablanca"}, 788 {0, 0, "WET", "WET", "Africa/El_Aaiun"}, 789 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, 790 {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, 791 {10800, 1, "PMST", "PMDT", "America/Miquelon"}, 792 {10800, 2, "UYT", "UYST", "America/Montevideo"}, 793 {10800, 1, "WGT", "WGST", "America/Godthab"}, 794 {10800, 2, "BRT", "BRST", "Brazil/East"}, 795 {12600, 1, "NST", "NDT", "America/St_Johns"}, 796 {14400, 1, "AST", "ADT", "Canada/Atlantic"}, 797 {14400, 2, "AMT", "AMST", "America/Cuiaba"}, 798 {14400, 2, "CLT", "CLST", "Chile/Continental"}, 799 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, 800 {14400, 2, "PYT", "PYST", "America/Asuncion"}, 801 {18000, 1, "CST", "CDT", "America/Havana"}, 802 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ 803 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, 804 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, 805 {21600, 0, "CST", "CDT", "America/Guatemala"}, 806 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ 807 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ 808 {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, 809 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ 810 {32400, 1, "AKST", "AKDT", "US/Alaska"}, 811 {36000, 1, "HAST", "HADT", "US/Aleutian"} 812}; 813 814/*#define DEBUG_TZNAME*/ 815 816static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) 817{ 818 int32_t idx; 819#ifdef DEBUG_TZNAME 820 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); 821#endif 822 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) 823 { 824 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds 825 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType 826 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 827 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) 828 { 829 return OFFSET_ZONE_MAPPINGS[idx].olsonID; 830 } 831 } 832 return NULL; 833} 834#endif 835 836#ifdef SEARCH_TZFILE 837#define MAX_READ_SIZE 512 838 839typedef struct DefaultTZInfo { 840 char* defaultTZBuffer; 841 int64_t defaultTZFileSize; 842 FILE* defaultTZFilePtr; 843 UBool defaultTZstatus; 844 int32_t defaultTZPosition; 845} DefaultTZInfo; 846 847/* 848 * This method compares the two files given to see if they are a match. 849 * It is currently use to compare two TZ files. 850 */ 851static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { 852 FILE* file; 853 int64_t sizeFile; 854 int64_t sizeFileLeft; 855 int32_t sizeFileRead; 856 int32_t sizeFileToRead; 857 char bufferFile[MAX_READ_SIZE]; 858 UBool result = TRUE; 859 860 if (tzInfo->defaultTZFilePtr == NULL) { 861 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); 862 } 863 file = fopen(TZFileName, "r"); 864 865 tzInfo->defaultTZPosition = 0; /* reset position to begin search */ 866 867 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { 868 /* First check that the file size are equal. */ 869 if (tzInfo->defaultTZFileSize == 0) { 870 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); 871 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); 872 } 873 fseek(file, 0, SEEK_END); 874 sizeFile = ftell(file); 875 sizeFileLeft = sizeFile; 876 877 if (sizeFile != tzInfo->defaultTZFileSize) { 878 result = FALSE; 879 } else { 880 /* Store the data from the files in seperate buffers and 881 * compare each byte to determine equality. 882 */ 883 if (tzInfo->defaultTZBuffer == NULL) { 884 rewind(tzInfo->defaultTZFilePtr); 885 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); 886 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); 887 } 888 rewind(file); 889 while(sizeFileLeft > 0) { 890 uprv_memset(bufferFile, 0, MAX_READ_SIZE); 891 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; 892 893 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); 894 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { 895 result = FALSE; 896 break; 897 } 898 sizeFileLeft -= sizeFileRead; 899 tzInfo->defaultTZPosition += sizeFileRead; 900 } 901 } 902 } else { 903 result = FALSE; 904 } 905 906 if (file != NULL) { 907 fclose(file); 908 } 909 910 return result; 911} 912 913 914/* dirent also lists two entries: "." and ".." that we can safely ignore. */ 915#define SKIP1 "." 916#define SKIP2 ".." 917static UBool U_CALLCONV putil_cleanup(void); 918static CharString *gSearchTZFileResult = NULL; 919 920/* 921 * This method recursively traverses the directory given for a matching TZ file and returns the first match. 922 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results. 923 */ 924static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { 925 DIR* dirp = opendir(path); 926 DIR* subDirp = NULL; 927 struct dirent* dirEntry = NULL; 928 929 char* result = NULL; 930 if (dirp == NULL) { 931 return result; 932 } 933 934 if (gSearchTZFileResult == NULL) { 935 gSearchTZFileResult = new CharString; 936 if (gSearchTZFileResult == NULL) { 937 return NULL; 938 } 939 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 940 } 941 942 /* Save the current path */ 943 UErrorCode status = U_ZERO_ERROR; 944 CharString curpath(path, -1, status); 945 if (U_FAILURE(status)) { 946 return NULL; 947 } 948 949 /* Check each entry in the directory. */ 950 while((dirEntry = readdir(dirp)) != NULL) { 951 const char* dirName = dirEntry->d_name; 952 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) { 953 /* Create a newpath with the new entry to test each entry in the directory. */ 954 CharString newpath(curpath, status); 955 newpath.append(dirName, -1, status); 956 if (U_FAILURE(status)) { 957 return NULL; 958 } 959 960 if ((subDirp = opendir(newpath.data())) != NULL) { 961 /* If this new path is a directory, make a recursive call with the newpath. */ 962 closedir(subDirp); 963 newpath.append('/', status); 964 if (U_FAILURE(status)) { 965 return NULL; 966 } 967 result = searchForTZFile(newpath.data(), tzInfo); 968 /* 969 Have to get out here. Otherwise, we'd keep looking 970 and return the first match in the top-level directory 971 if there's a match in the top-level. If not, this function 972 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). 973 It worked without this in most cases because we have a fallback of calling 974 localtime_r to figure out the default timezone. 975 */ 976 if (result != NULL) 977 break; 978 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { 979 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) { 980 int32_t amountToSkip = sizeof(TZZONEINFO) - 1; 981 if (amountToSkip > newpath.length()) { 982 amountToSkip = newpath.length(); 983 } 984 const char* zoneid = newpath.data() + amountToSkip; 985 skipZoneIDPrefix(&zoneid); 986 gSearchTZFileResult->clear(); 987 gSearchTZFileResult->append(zoneid, -1, status); 988 if (U_FAILURE(status)) { 989 return NULL; 990 } 991 result = gSearchTZFileResult->data(); 992 /* Get out after the first one found. */ 993 break; 994 } 995 } 996 } 997 } 998 closedir(dirp); 999 return result; 1000} 1001#endif 1002 1003U_CAPI void U_EXPORT2 1004uprv_tzname_clear_cache() 1005{ 1006#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) 1007 gTimeZoneBufferPtr = NULL; 1008#endif 1009} 1010 1011U_CAPI const char* U_EXPORT2 1012uprv_tzname(int n) 1013{ 1014 const char *tzid = NULL; 1015#if U_PLATFORM_USES_ONLY_WIN32_API 1016 tzid = uprv_detectWindowsTimeZone(); 1017 1018 if (tzid != NULL) { 1019 return tzid; 1020 } 1021#else 1022 1023/*#if U_PLATFORM_IS_DARWIN_BASED 1024 int ret; 1025 1026 tzid = getenv("TZFILE"); 1027 if (tzid != NULL) { 1028 return tzid; 1029 } 1030#endif*/ 1031 1032/* This code can be temporarily disabled to test tzname resolution later on. */ 1033#ifndef DEBUG_TZNAME 1034 tzid = getenv("TZ"); 1035 if (tzid != NULL && isValidOlsonID(tzid) 1036#if U_PLATFORM == U_PF_SOLARIS 1037 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */ 1038 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 1039#endif 1040 ) { 1041 /* The colon forces tzset() to treat the remainder as zoneinfo path */ 1042 if (tzid[0] == ':') { 1043 tzid++; 1044 } 1045 /* This might be a good Olson ID. */ 1046 skipZoneIDPrefix(&tzid); 1047 return tzid; 1048 } 1049 /* else U_TZNAME will give a better result. */ 1050#endif 1051 1052#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) 1053 /* Caller must handle threading issues */ 1054 if (gTimeZoneBufferPtr == NULL) { 1055 /* 1056 This is a trick to look at the name of the link to get the Olson ID 1057 because the tzfile contents is underspecified. 1058 This isn't guaranteed to work because it may not be a symlink. 1059 */ 1060 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1); 1061 if (0 < ret) { 1062 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); 1063 gTimeZoneBuffer[ret] = 0; 1064 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 1065 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1066 { 1067 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1068 } 1069#if U_PLATFORM == U_PF_SOLARIS 1070 else 1071 { 1072 tzZoneInfoLen = uprv_strlen(TZZONEINFO2); 1073 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0 1074 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1075 { 1076 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1077 } 1078 } 1079#endif 1080 } else { 1081#if defined(SEARCH_TZFILE) 1082 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); 1083 if (tzInfo != NULL) { 1084 tzInfo->defaultTZBuffer = NULL; 1085 tzInfo->defaultTZFileSize = 0; 1086 tzInfo->defaultTZFilePtr = NULL; 1087 tzInfo->defaultTZstatus = FALSE; 1088 tzInfo->defaultTZPosition = 0; 1089 1090 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); 1091 1092 /* Free previously allocated memory */ 1093 if (tzInfo->defaultTZBuffer != NULL) { 1094 uprv_free(tzInfo->defaultTZBuffer); 1095 } 1096 if (tzInfo->defaultTZFilePtr != NULL) { 1097 fclose(tzInfo->defaultTZFilePtr); 1098 } 1099 uprv_free(tzInfo); 1100 } 1101 1102 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { 1103 return gTimeZoneBufferPtr; 1104 } 1105#endif 1106 } 1107 } 1108 else { 1109 return gTimeZoneBufferPtr; 1110 } 1111#endif 1112#endif 1113 1114#ifdef U_TZNAME 1115#if U_PLATFORM_USES_ONLY_WIN32_API 1116 /* The return value is free'd in timezone.cpp on Windows because 1117 * the other code path returns a pointer to a heap location. */ 1118 return uprv_strdup(U_TZNAME[n]); 1119#else 1120 /* 1121 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. 1122 So we remap the abbreviation to an olson ID. 1123 1124 Since Windows exposes a little more timezone information, 1125 we normally don't use this code on Windows because 1126 uprv_detectWindowsTimeZone should have already given the correct answer. 1127 */ 1128 { 1129 struct tm juneSol, decemberSol; 1130 int daylightType; 1131 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ 1132 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ 1133 1134 /* This probing will tell us when daylight savings occurs. */ 1135 localtime_r(&juneSolstice, &juneSol); 1136 localtime_r(&decemberSolstice, &decemberSol); 1137 if(decemberSol.tm_isdst > 0) { 1138 daylightType = U_DAYLIGHT_DECEMBER; 1139 } else if(juneSol.tm_isdst > 0) { 1140 daylightType = U_DAYLIGHT_JUNE; 1141 } else { 1142 daylightType = U_DAYLIGHT_NONE; 1143 } 1144 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); 1145 if (tzid != NULL) { 1146 return tzid; 1147 } 1148 } 1149 return U_TZNAME[n]; 1150#endif 1151#else 1152 return ""; 1153#endif 1154} 1155 1156/* Get and set the ICU data directory --------------------------------------- */ 1157 1158static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER; 1159static char *gDataDirectory = NULL; 1160 1161UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER; 1162static CharString *gTimeZoneFilesDirectory = NULL; 1163 1164#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API 1165 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ 1166#endif 1167 1168static UBool U_CALLCONV putil_cleanup(void) 1169{ 1170 if (gDataDirectory && *gDataDirectory) { 1171 uprv_free(gDataDirectory); 1172 } 1173 gDataDirectory = NULL; 1174 gDataDirInitOnce.reset(); 1175 1176 delete gTimeZoneFilesDirectory; 1177 gTimeZoneFilesDirectory = NULL; 1178 gTimeZoneFilesInitOnce.reset(); 1179 1180#ifdef SEARCH_TZFILE 1181 delete gSearchTZFileResult; 1182 gSearchTZFileResult = NULL; 1183#endif 1184 1185#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API 1186 if (gCorrectedPOSIXLocale) { 1187 uprv_free(gCorrectedPOSIXLocale); 1188 gCorrectedPOSIXLocale = NULL; 1189 } 1190#endif 1191 return TRUE; 1192} 1193 1194/* 1195 * Set the data directory. 1196 * Make a copy of the passed string, and set the global data dir to point to it. 1197 */ 1198U_CAPI void U_EXPORT2 1199u_setDataDirectory(const char *directory) { 1200 char *newDataDir; 1201 int32_t length; 1202 1203 if(directory==NULL || *directory==0) { 1204 /* A small optimization to prevent the malloc and copy when the 1205 shared library is used, and this is a way to make sure that NULL 1206 is never returned. 1207 */ 1208 newDataDir = (char *)""; 1209 } 1210 else { 1211 length=(int32_t)uprv_strlen(directory); 1212 newDataDir = (char *)uprv_malloc(length + 2); 1213 /* Exit out if newDataDir could not be created. */ 1214 if (newDataDir == NULL) { 1215 return; 1216 } 1217 uprv_strcpy(newDataDir, directory); 1218 1219#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1220 { 1221 char *p; 1222 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { 1223 *p = U_FILE_SEP_CHAR; 1224 } 1225 } 1226#endif 1227 } 1228 1229 if (gDataDirectory && *gDataDirectory) { 1230 uprv_free(gDataDirectory); 1231 } 1232 gDataDirectory = newDataDir; 1233 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1234} 1235 1236U_CAPI UBool U_EXPORT2 1237uprv_pathIsAbsolute(const char *path) 1238{ 1239 if(!path || !*path) { 1240 return FALSE; 1241 } 1242 1243 if(*path == U_FILE_SEP_CHAR) { 1244 return TRUE; 1245 } 1246 1247#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1248 if(*path == U_FILE_ALT_SEP_CHAR) { 1249 return TRUE; 1250 } 1251#endif 1252 1253#if U_PLATFORM_USES_ONLY_WIN32_API 1254 if( (((path[0] >= 'A') && (path[0] <= 'Z')) || 1255 ((path[0] >= 'a') && (path[0] <= 'z'))) && 1256 path[1] == ':' ) { 1257 return TRUE; 1258 } 1259#endif 1260 1261 return FALSE; 1262} 1263 1264/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR 1265 until some client wrapper makefiles are updated */ 1266#if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR 1267# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1268# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" 1269# endif 1270#endif 1271 1272static void U_CALLCONV dataDirectoryInitFn() { 1273 /* If we already have the directory, then return immediately. Will happen if user called 1274 * u_setDataDirectory(). 1275 */ 1276 if (gDataDirectory) { 1277 return; 1278 } 1279 1280 const char *path = NULL; 1281#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1282 char datadir_path_buffer[PATH_MAX]; 1283#endif 1284 1285 /* 1286 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to 1287 override ICU's data with the ICU_DATA environment variable. This prevents 1288 problems where multiple custom copies of ICU's specific version of data 1289 are installed on a system. Either the application must define the data 1290 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling 1291 ICU, set the data with udata_setCommonData or trust that all of the 1292 required data is contained in ICU's data library that contains 1293 the entry point defined by U_ICUDATA_ENTRY_POINT. 1294 1295 There may also be some platforms where environment variables 1296 are not allowed. 1297 */ 1298# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO 1299 /* First try to get the environment variable */ 1300 path=getenv("ICU_DATA"); 1301# endif 1302 1303 /* ICU_DATA_DIR may be set as a compile option. 1304 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time 1305 * and is used only when data is built in archive mode eliminating the need 1306 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation 1307 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to 1308 * set their own path. 1309 */ 1310#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) 1311 if(path==NULL || *path==0) { 1312# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1313 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); 1314# endif 1315# ifdef ICU_DATA_DIR 1316 path=ICU_DATA_DIR; 1317# else 1318 path=U_ICU_DATA_DEFAULT_DIR; 1319# endif 1320# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1321 if (prefix != NULL) { 1322 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); 1323 path=datadir_path_buffer; 1324 } 1325# endif 1326 } 1327#endif 1328 1329 if(path==NULL) { 1330 /* It looks really bad, set it to something. */ 1331 path = ""; 1332 } 1333 1334 u_setDataDirectory(path); 1335 return; 1336} 1337 1338U_CAPI const char * U_EXPORT2 1339u_getDataDirectory(void) { 1340 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn); 1341 return gDataDirectory; 1342} 1343 1344static void setTimeZoneFilesDir(const char *path, UErrorCode &status) { 1345 if (U_FAILURE(status)) { 1346 return; 1347 } 1348 gTimeZoneFilesDirectory->clear(); 1349 gTimeZoneFilesDirectory->append(path, status); 1350#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1351 char *p = gTimeZoneFilesDirectory->data(); 1352 while (p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) { 1353 *p = U_FILE_SEP_CHAR; 1354 } 1355#endif 1356} 1357 1358#define TO_STRING(x) TO_STRING_2(x) 1359#define TO_STRING_2(x) #x 1360 1361static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { 1362 U_ASSERT(gTimeZoneFilesDirectory == NULL); 1363 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1364 gTimeZoneFilesDirectory = new CharString(); 1365 if (gTimeZoneFilesDirectory == NULL) { 1366 status = U_MEMORY_ALLOCATION_ERROR; 1367 return; 1368 } 1369 const char *dir = getenv("ICU_TIMEZONE_FILES_DIR"); 1370#if defined(U_TIMEZONE_FILES_DIR) 1371 if (dir == NULL) { 1372 dir = TO_STRING(U_TIMEZONE_FILES_DIR); 1373 } 1374#endif 1375 if (dir == NULL) { 1376 dir = ""; 1377 } 1378 setTimeZoneFilesDir(dir, status); 1379} 1380 1381 1382U_CAPI const char * U_EXPORT2 1383u_getTimeZoneFilesDirectory(UErrorCode *status) { 1384 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); 1385 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : ""; 1386} 1387 1388U_CAPI void U_EXPORT2 1389u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) { 1390 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); 1391 setTimeZoneFilesDir(path, *status); 1392 1393 // Note: this function does some extra churn, first setting based on the 1394 // environment, then immediately replacing with the value passed in. 1395 // The logic is simpler that way, and performance shouldn't be an issue. 1396} 1397 1398 1399#if U_POSIX_LOCALE 1400/* A helper function used by uprv_getPOSIXIDForDefaultLocale and 1401 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for 1402 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. 1403 */ 1404static const char *uprv_getPOSIXIDForCategory(int category) 1405{ 1406 const char* posixID = NULL; 1407 if (category == LC_MESSAGES || category == LC_CTYPE) { 1408 /* 1409 * On Solaris two different calls to setlocale can result in 1410 * different values. Only get this value once. 1411 * 1412 * We must check this first because an application can set this. 1413 * 1414 * LC_ALL can't be used because it's platform dependent. The LANG 1415 * environment variable seems to affect LC_CTYPE variable by default. 1416 * Here is what setlocale(LC_ALL, NULL) can return. 1417 * HPUX can return 'C C C C C C C' 1418 * Solaris can return /en_US/C/C/C/C/C on the second try. 1419 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... 1420 * 1421 * The default codepage detection also needs to use LC_CTYPE. 1422 * 1423 * Do not call setlocale(LC_*, "")! Using an empty string instead 1424 * of NULL, will modify the libc behavior. 1425 */ 1426 posixID = setlocale(category, NULL); 1427 if ((posixID == 0) 1428 || (uprv_strcmp("C", posixID) == 0) 1429 || (uprv_strcmp("POSIX", posixID) == 0)) 1430 { 1431 /* Maybe we got some garbage. Try something more reasonable */ 1432 posixID = getenv("LC_ALL"); 1433 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008 1434 * This is needed to properly handle empty env. variables 1435 */ 1436#if U_PLATFORM == U_PF_SOLARIS 1437 if ((posixID == 0) || (posixID[0] == '\0')) { 1438 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); 1439 if ((posixID == 0) || (posixID[0] == '\0')) { 1440#else 1441 if (posixID == 0) { 1442 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); 1443 if (posixID == 0) { 1444#endif 1445 posixID = getenv("LANG"); 1446 } 1447 } 1448 } 1449 } 1450 if ((posixID==0) 1451 || (uprv_strcmp("C", posixID) == 0) 1452 || (uprv_strcmp("POSIX", posixID) == 0)) 1453 { 1454 /* Nothing worked. Give it a nice POSIX default value. */ 1455 posixID = "en_US_POSIX"; 1456 } 1457 return posixID; 1458} 1459 1460/* Return just the POSIX id for the default locale, whatever happens to be in 1461 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. 1462 */ 1463static const char *uprv_getPOSIXIDForDefaultLocale(void) 1464{ 1465 static const char* posixID = NULL; 1466 if (posixID == 0) { 1467 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); 1468 } 1469 return posixID; 1470} 1471 1472#if !U_CHARSET_IS_UTF8 1473/* Return just the POSIX id for the default codepage, whatever happens to be in 1474 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. 1475 */ 1476static const char *uprv_getPOSIXIDForDefaultCodepage(void) 1477{ 1478 static const char* posixID = NULL; 1479 if (posixID == 0) { 1480 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); 1481 } 1482 return posixID; 1483} 1484#endif 1485#endif 1486 1487/* NOTE: The caller should handle thread safety */ 1488U_CAPI const char* U_EXPORT2 1489uprv_getDefaultLocaleID() 1490{ 1491#if U_POSIX_LOCALE 1492/* 1493 Note that: (a '!' means the ID is improper somehow) 1494 LC_ALL ----> default_loc codepage 1495-------------------------------------------------------- 1496 ab.CD ab CD 1497 ab@CD ab__CD - 1498 ab@CD.EF ab__CD EF 1499 1500 ab_CD.EF@GH ab_CD_GH EF 1501 1502Some 'improper' ways to do the same as above: 1503 ! ab_CD@GH.EF ab_CD_GH EF 1504 ! ab_CD.EF@GH.IJ ab_CD_GH EF 1505 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF 1506 1507 _CD@GH _CD_GH - 1508 _CD.EF@GH _CD_GH EF 1509 1510The variant cannot have dots in it. 1511The 'rightmost' variant (@xxx) wins. 1512The leftmost codepage (.xxx) wins. 1513*/ 1514 char *correctedPOSIXLocale = 0; 1515 const char* posixID = uprv_getPOSIXIDForDefaultLocale(); 1516 const char *p; 1517 const char *q; 1518 int32_t len; 1519 1520 /* Format: (no spaces) 1521 ll [ _CC ] [ . MM ] [ @ VV] 1522 1523 l = lang, C = ctry, M = charmap, V = variant 1524 */ 1525 1526 if (gCorrectedPOSIXLocale != NULL) { 1527 return gCorrectedPOSIXLocale; 1528 } 1529 1530 if ((p = uprv_strchr(posixID, '.')) != NULL) { 1531 /* assume new locale can't be larger than old one? */ 1532 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1)); 1533 /* Exit on memory allocation error. */ 1534 if (correctedPOSIXLocale == NULL) { 1535 return NULL; 1536 } 1537 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1538 correctedPOSIXLocale[p-posixID] = 0; 1539 1540 /* do not copy after the @ */ 1541 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { 1542 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; 1543 } 1544 } 1545 1546 /* Note that we scan the *uncorrected* ID. */ 1547 if ((p = uprv_strrchr(posixID, '@')) != NULL) { 1548 if (correctedPOSIXLocale == NULL) { 1549 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1)); 1550 /* Exit on memory allocation error. */ 1551 if (correctedPOSIXLocale == NULL) { 1552 return NULL; 1553 } 1554 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1555 correctedPOSIXLocale[p-posixID] = 0; 1556 } 1557 p++; 1558 1559 /* Take care of any special cases here.. */ 1560 if (!uprv_strcmp(p, "nynorsk")) { 1561 p = "NY"; 1562 /* Don't worry about no__NY. In practice, it won't appear. */ 1563 } 1564 1565 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { 1566 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ 1567 } 1568 else { 1569 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ 1570 } 1571 1572 if ((q = uprv_strchr(p, '.')) != NULL) { 1573 /* How big will the resulting string be? */ 1574 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); 1575 uprv_strncat(correctedPOSIXLocale, p, q-p); 1576 correctedPOSIXLocale[len] = 0; 1577 } 1578 else { 1579 /* Anything following the @ sign */ 1580 uprv_strcat(correctedPOSIXLocale, p); 1581 } 1582 1583 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? 1584 * How about 'russian' -> 'ru'? 1585 * Many of the other locales using ISO codes will be handled by the 1586 * canonicalization functions in uloc_getDefault. 1587 */ 1588 } 1589 1590 /* Was a correction made? */ 1591 if (correctedPOSIXLocale != NULL) { 1592 posixID = correctedPOSIXLocale; 1593 } 1594 else { 1595 /* copy it, just in case the original pointer goes away. See j2395 */ 1596 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); 1597 /* Exit on memory allocation error. */ 1598 if (correctedPOSIXLocale == NULL) { 1599 return NULL; 1600 } 1601 posixID = uprv_strcpy(correctedPOSIXLocale, posixID); 1602 } 1603 1604 if (gCorrectedPOSIXLocale == NULL) { 1605 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1606 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1607 correctedPOSIXLocale = NULL; 1608 } 1609 1610 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ 1611 uprv_free(correctedPOSIXLocale); 1612 } 1613 1614 return posixID; 1615 1616#elif U_PLATFORM_USES_ONLY_WIN32_API 1617#define POSIX_LOCALE_CAPACITY 64 1618 UErrorCode status = U_ZERO_ERROR; 1619 char *correctedPOSIXLocale = 0; 1620 1621 if (gCorrectedPOSIXLocale != NULL) { 1622 return gCorrectedPOSIXLocale; 1623 } 1624 1625 LCID id = GetThreadLocale(); 1626 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); 1627 if (correctedPOSIXLocale) { 1628 int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); 1629 if (U_SUCCESS(status)) { 1630 *(correctedPOSIXLocale + posixLen) = 0; 1631 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1632 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1633 } else { 1634 uprv_free(correctedPOSIXLocale); 1635 } 1636 } 1637 1638 if (gCorrectedPOSIXLocale == NULL) { 1639 return "en_US"; 1640 } 1641 return gCorrectedPOSIXLocale; 1642 1643#elif U_PLATFORM == U_PF_OS400 1644 /* locales are process scoped and are by definition thread safe */ 1645 static char correctedLocale[64]; 1646 const char *localeID = getenv("LC_ALL"); 1647 char *p; 1648 1649 if (localeID == NULL) 1650 localeID = getenv("LANG"); 1651 if (localeID == NULL) 1652 localeID = setlocale(LC_ALL, NULL); 1653 /* Make sure we have something... */ 1654 if (localeID == NULL) 1655 return "en_US_POSIX"; 1656 1657 /* Extract the locale name from the path. */ 1658 if((p = uprv_strrchr(localeID, '/')) != NULL) 1659 { 1660 /* Increment p to start of locale name. */ 1661 p++; 1662 localeID = p; 1663 } 1664 1665 /* Copy to work location. */ 1666 uprv_strcpy(correctedLocale, localeID); 1667 1668 /* Strip off the '.locale' extension. */ 1669 if((p = uprv_strchr(correctedLocale, '.')) != NULL) { 1670 *p = 0; 1671 } 1672 1673 /* Upper case the locale name. */ 1674 T_CString_toUpperCase(correctedLocale); 1675 1676 /* See if we are using the POSIX locale. Any of the 1677 * following are equivalent and use the same QLGPGCMA 1678 * (POSIX) locale. 1679 * QLGPGCMA2 means UCS2 1680 * QLGPGCMA_4 means UTF-32 1681 * QLGPGCMA_8 means UTF-8 1682 */ 1683 if ((uprv_strcmp("C", correctedLocale) == 0) || 1684 (uprv_strcmp("POSIX", correctedLocale) == 0) || 1685 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) 1686 { 1687 uprv_strcpy(correctedLocale, "en_US_POSIX"); 1688 } 1689 else 1690 { 1691 int16_t LocaleLen; 1692 1693 /* Lower case the lang portion. */ 1694 for(p = correctedLocale; *p != 0 && *p != '_'; p++) 1695 { 1696 *p = uprv_tolower(*p); 1697 } 1698 1699 /* Adjust for Euro. After '_E' add 'URO'. */ 1700 LocaleLen = uprv_strlen(correctedLocale); 1701 if (correctedLocale[LocaleLen - 2] == '_' && 1702 correctedLocale[LocaleLen - 1] == 'E') 1703 { 1704 uprv_strcat(correctedLocale, "URO"); 1705 } 1706 1707 /* If using Lotus-based locale then convert to 1708 * equivalent non Lotus. 1709 */ 1710 else if (correctedLocale[LocaleLen - 2] == '_' && 1711 correctedLocale[LocaleLen - 1] == 'L') 1712 { 1713 correctedLocale[LocaleLen - 2] = 0; 1714 } 1715 1716 /* There are separate simplified and traditional 1717 * locales called zh_HK_S and zh_HK_T. 1718 */ 1719 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) 1720 { 1721 uprv_strcpy(correctedLocale, "zh_HK"); 1722 } 1723 1724 /* A special zh_CN_GBK locale... 1725 */ 1726 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) 1727 { 1728 uprv_strcpy(correctedLocale, "zh_CN"); 1729 } 1730 1731 } 1732 1733 return correctedLocale; 1734#endif 1735 1736} 1737 1738#if !U_CHARSET_IS_UTF8 1739#if U_POSIX_LOCALE 1740/* 1741Due to various platform differences, one platform may specify a charset, 1742when they really mean a different charset. Remap the names so that they are 1743compatible with ICU. Only conflicting/ambiguous aliases should be resolved 1744here. Before adding anything to this function, please consider adding unique 1745names to the ICU alias table in the data directory. 1746*/ 1747static const char* 1748remapPlatformDependentCodepage(const char *locale, const char *name) { 1749 if (locale != NULL && *locale == 0) { 1750 /* Make sure that an empty locale is handled the same way. */ 1751 locale = NULL; 1752 } 1753 if (name == NULL) { 1754 return NULL; 1755 } 1756#if U_PLATFORM == U_PF_AIX 1757 if (uprv_strcmp(name, "IBM-943") == 0) { 1758 /* Use the ASCII compatible ibm-943 */ 1759 name = "Shift-JIS"; 1760 } 1761 else if (uprv_strcmp(name, "IBM-1252") == 0) { 1762 /* Use the windows-1252 that contains the Euro */ 1763 name = "IBM-5348"; 1764 } 1765#elif U_PLATFORM == U_PF_SOLARIS 1766 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { 1767 /* Solaris underspecifies the "EUC" name. */ 1768 if (uprv_strcmp(locale, "zh_CN") == 0) { 1769 name = "EUC-CN"; 1770 } 1771 else if (uprv_strcmp(locale, "zh_TW") == 0) { 1772 name = "EUC-TW"; 1773 } 1774 else if (uprv_strcmp(locale, "ko_KR") == 0) { 1775 name = "EUC-KR"; 1776 } 1777 } 1778 else if (uprv_strcmp(name, "eucJP") == 0) { 1779 /* 1780 ibm-954 is the best match. 1781 ibm-33722 is the default for eucJP (similar to Windows). 1782 */ 1783 name = "eucjis"; 1784 } 1785 else if (uprv_strcmp(name, "646") == 0) { 1786 /* 1787 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was 1788 * ISO-8859-1 instead of US-ASCII(646). 1789 */ 1790 name = "ISO-8859-1"; 1791 } 1792#elif U_PLATFORM_IS_DARWIN_BASED 1793 if (locale == NULL && *name == 0) { 1794 /* 1795 No locale was specified, and an empty name was passed in. 1796 This usually indicates that nl_langinfo didn't return valid information. 1797 Mac OS X uses UTF-8 by default (especially the locale data and console). 1798 */ 1799 name = "UTF-8"; 1800 } 1801 else if (uprv_strcmp(name, "CP949") == 0) { 1802 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1803 name = "EUC-KR"; 1804 } 1805 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { 1806 /* 1807 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1808 */ 1809 name = "UTF-8"; 1810 } 1811#elif U_PLATFORM == U_PF_BSD 1812 if (uprv_strcmp(name, "CP949") == 0) { 1813 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1814 name = "EUC-KR"; 1815 } 1816#elif U_PLATFORM == U_PF_HPUX 1817 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { 1818 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ 1819 /* zh_TW.big5 is not the same charset as zh_HK.big5! */ 1820 name = "hkbig5"; 1821 } 1822 else if (uprv_strcmp(name, "eucJP") == 0) { 1823 /* 1824 ibm-1350 is the best match, but unavailable. 1825 ibm-954 is mostly a superset of ibm-1350. 1826 ibm-33722 is the default for eucJP (similar to Windows). 1827 */ 1828 name = "eucjis"; 1829 } 1830#elif U_PLATFORM == U_PF_LINUX 1831 if (locale != NULL && uprv_strcmp(name, "euc") == 0) { 1832 /* Linux underspecifies the "EUC" name. */ 1833 if (uprv_strcmp(locale, "korean") == 0) { 1834 name = "EUC-KR"; 1835 } 1836 else if (uprv_strcmp(locale, "japanese") == 0) { 1837 /* See comment below about eucJP */ 1838 name = "eucjis"; 1839 } 1840 } 1841 else if (uprv_strcmp(name, "eucjp") == 0) { 1842 /* 1843 ibm-1350 is the best match, but unavailable. 1844 ibm-954 is mostly a superset of ibm-1350. 1845 ibm-33722 is the default for eucJP (similar to Windows). 1846 */ 1847 name = "eucjis"; 1848 } 1849 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && 1850 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { 1851 /* 1852 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1853 */ 1854 name = "UTF-8"; 1855 } 1856 /* 1857 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of 1858 * it by falling back to 'US-ASCII' when NULL is returned from this 1859 * function. So, we don't have to worry about it here. 1860 */ 1861#endif 1862 /* return NULL when "" is passed in */ 1863 if (*name == 0) { 1864 name = NULL; 1865 } 1866 return name; 1867} 1868 1869static const char* 1870getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) 1871{ 1872 char localeBuf[100]; 1873 const char *name = NULL; 1874 char *variant = NULL; 1875 1876 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { 1877 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); 1878 uprv_strncpy(localeBuf, localeName, localeCapacity); 1879 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ 1880 name = uprv_strncpy(buffer, name+1, buffCapacity); 1881 buffer[buffCapacity-1] = 0; /* ensure NULL termination */ 1882 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) { 1883 *variant = 0; 1884 } 1885 name = remapPlatformDependentCodepage(localeBuf, name); 1886 } 1887 return name; 1888} 1889#endif 1890 1891static const char* 1892int_getDefaultCodepage() 1893{ 1894#if U_PLATFORM == U_PF_OS400 1895 uint32_t ccsid = 37; /* Default to ibm-37 */ 1896 static char codepage[64]; 1897 Qwc_JOBI0400_t jobinfo; 1898 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ 1899 1900 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", 1901 "* ", " ", &error); 1902 1903 if (error.Bytes_Available == 0) { 1904 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { 1905 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; 1906 } 1907 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { 1908 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; 1909 } 1910 /* else use the default */ 1911 } 1912 sprintf(codepage,"ibm-%d", ccsid); 1913 return codepage; 1914 1915#elif U_PLATFORM == U_PF_OS390 1916 static char codepage[64]; 1917 1918 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); 1919 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); 1920 codepage[63] = 0; /* NULL terminate */ 1921 1922 return codepage; 1923 1924#elif U_PLATFORM_USES_ONLY_WIN32_API 1925 static char codepage[64]; 1926 sprintf(codepage, "windows-%d", GetACP()); 1927 return codepage; 1928 1929#elif U_POSIX_LOCALE 1930 static char codesetName[100]; 1931 const char *localeName = NULL; 1932 const char *name = NULL; 1933 1934 localeName = uprv_getPOSIXIDForDefaultCodepage(); 1935 uprv_memset(codesetName, 0, sizeof(codesetName)); 1936 /* On Solaris nl_langinfo returns C locale values unless setlocale 1937 * was called earlier. 1938 */ 1939#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS) 1940 /* When available, check nl_langinfo first because it usually gives more 1941 useful names. It depends on LC_CTYPE. 1942 nl_langinfo may use the same buffer as setlocale. */ 1943 { 1944 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); 1945#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED 1946 /* 1947 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 1948 * instead of ASCII. 1949 */ 1950 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { 1951 codeset = remapPlatformDependentCodepage(localeName, codeset); 1952 } else 1953#endif 1954 { 1955 codeset = remapPlatformDependentCodepage(NULL, codeset); 1956 } 1957 1958 if (codeset != NULL) { 1959 uprv_strncpy(codesetName, codeset, sizeof(codesetName)); 1960 codesetName[sizeof(codesetName)-1] = 0; 1961 return codesetName; 1962 } 1963 } 1964#endif 1965 1966 /* Use setlocale in a nice way, and then check some environment variables. 1967 Maybe the application used setlocale already. 1968 */ 1969 uprv_memset(codesetName, 0, sizeof(codesetName)); 1970 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); 1971 if (name) { 1972 /* if we can find the codeset name from setlocale, return that. */ 1973 return name; 1974 } 1975 1976 if (*codesetName == 0) 1977 { 1978 /* Everything failed. Return US ASCII (ISO 646). */ 1979 (void)uprv_strcpy(codesetName, "US-ASCII"); 1980 } 1981 return codesetName; 1982#else 1983 return "US-ASCII"; 1984#endif 1985} 1986 1987 1988U_CAPI const char* U_EXPORT2 1989uprv_getDefaultCodepage() 1990{ 1991 static char const *name = NULL; 1992 umtx_lock(NULL); 1993 if (name == NULL) { 1994 name = int_getDefaultCodepage(); 1995 } 1996 umtx_unlock(NULL); 1997 return name; 1998} 1999#endif /* !U_CHARSET_IS_UTF8 */ 2000 2001 2002/* end of platform-specific implementation -------------- */ 2003 2004/* version handling --------------------------------------------------------- */ 2005 2006U_CAPI void U_EXPORT2 2007u_versionFromString(UVersionInfo versionArray, const char *versionString) { 2008 char *end; 2009 uint16_t part=0; 2010 2011 if(versionArray==NULL) { 2012 return; 2013 } 2014 2015 if(versionString!=NULL) { 2016 for(;;) { 2017 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); 2018 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { 2019 break; 2020 } 2021 versionString=end+1; 2022 } 2023 } 2024 2025 while(part<U_MAX_VERSION_LENGTH) { 2026 versionArray[part++]=0; 2027 } 2028} 2029 2030U_CAPI void U_EXPORT2 2031u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { 2032 if(versionArray!=NULL && versionString!=NULL) { 2033 char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; 2034 int32_t len = u_strlen(versionString); 2035 if(len>U_MAX_VERSION_STRING_LENGTH) { 2036 len = U_MAX_VERSION_STRING_LENGTH; 2037 } 2038 u_UCharsToChars(versionString, versionChars, len); 2039 versionChars[len]=0; 2040 u_versionFromString(versionArray, versionChars); 2041 } 2042} 2043 2044U_CAPI void U_EXPORT2 2045u_versionToString(const UVersionInfo versionArray, char *versionString) { 2046 uint16_t count, part; 2047 uint8_t field; 2048 2049 if(versionString==NULL) { 2050 return; 2051 } 2052 2053 if(versionArray==NULL) { 2054 versionString[0]=0; 2055 return; 2056 } 2057 2058 /* count how many fields need to be written */ 2059 for(count=4; count>0 && versionArray[count-1]==0; --count) { 2060 } 2061 2062 if(count <= 1) { 2063 count = 2; 2064 } 2065 2066 /* write the first part */ 2067 /* write the decimal field value */ 2068 field=versionArray[0]; 2069 if(field>=100) { 2070 *versionString++=(char)('0'+field/100); 2071 field%=100; 2072 } 2073 if(field>=10) { 2074 *versionString++=(char)('0'+field/10); 2075 field%=10; 2076 } 2077 *versionString++=(char)('0'+field); 2078 2079 /* write the following parts */ 2080 for(part=1; part<count; ++part) { 2081 /* write a dot first */ 2082 *versionString++=U_VERSION_DELIMITER; 2083 2084 /* write the decimal field value */ 2085 field=versionArray[part]; 2086 if(field>=100) { 2087 *versionString++=(char)('0'+field/100); 2088 field%=100; 2089 } 2090 if(field>=10) { 2091 *versionString++=(char)('0'+field/10); 2092 field%=10; 2093 } 2094 *versionString++=(char)('0'+field); 2095 } 2096 2097 /* NUL-terminate */ 2098 *versionString=0; 2099} 2100 2101U_CAPI void U_EXPORT2 2102u_getVersion(UVersionInfo versionArray) { 2103 (void)copyright; // Suppress unused variable warning from clang. 2104 u_versionFromString(versionArray, U_ICU_VERSION); 2105} 2106 2107/** 2108 * icucfg.h dependent code 2109 */ 2110 2111#if U_ENABLE_DYLOAD 2112 2113#if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API 2114 2115#if HAVE_DLFCN_H 2116 2117#ifdef __MVS__ 2118#ifndef __SUSV3 2119#define __SUSV3 1 2120#endif 2121#endif 2122#include <dlfcn.h> 2123#endif 2124 2125U_INTERNAL void * U_EXPORT2 2126uprv_dl_open(const char *libName, UErrorCode *status) { 2127 void *ret = NULL; 2128 if(U_FAILURE(*status)) return ret; 2129 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); 2130 if(ret==NULL) { 2131#ifdef U_TRACE_DYLOAD 2132 printf("dlerror on dlopen(%s): %s\n", libName, dlerror()); 2133#endif 2134 *status = U_MISSING_RESOURCE_ERROR; 2135 } 2136 return ret; 2137} 2138 2139U_INTERNAL void U_EXPORT2 2140uprv_dl_close(void *lib, UErrorCode *status) { 2141 if(U_FAILURE(*status)) return; 2142 dlclose(lib); 2143} 2144 2145U_INTERNAL UVoidFunction* U_EXPORT2 2146uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2147 union { 2148 UVoidFunction *fp; 2149 void *vp; 2150 } uret; 2151 uret.fp = NULL; 2152 if(U_FAILURE(*status)) return uret.fp; 2153 uret.vp = dlsym(lib, sym); 2154 if(uret.vp == NULL) { 2155#ifdef U_TRACE_DYLOAD 2156 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror()); 2157#endif 2158 *status = U_MISSING_RESOURCE_ERROR; 2159 } 2160 return uret.fp; 2161} 2162 2163#else 2164 2165/* null (nonexistent) implementation. */ 2166 2167U_INTERNAL void * U_EXPORT2 2168uprv_dl_open(const char *libName, UErrorCode *status) { 2169 if(U_FAILURE(*status)) return NULL; 2170 *status = U_UNSUPPORTED_ERROR; 2171 return NULL; 2172} 2173 2174U_INTERNAL void U_EXPORT2 2175uprv_dl_close(void *lib, UErrorCode *status) { 2176 if(U_FAILURE(*status)) return; 2177 *status = U_UNSUPPORTED_ERROR; 2178 return; 2179} 2180 2181 2182U_INTERNAL UVoidFunction* U_EXPORT2 2183uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2184 if(U_SUCCESS(*status)) { 2185 *status = U_UNSUPPORTED_ERROR; 2186 } 2187 return (UVoidFunction*)NULL; 2188} 2189 2190 2191 2192#endif 2193 2194#elif U_PLATFORM_USES_ONLY_WIN32_API 2195 2196U_INTERNAL void * U_EXPORT2 2197uprv_dl_open(const char *libName, UErrorCode *status) { 2198 HMODULE lib = NULL; 2199 2200 if(U_FAILURE(*status)) return NULL; 2201 2202 lib = LoadLibraryA(libName); 2203 2204 if(lib==NULL) { 2205 *status = U_MISSING_RESOURCE_ERROR; 2206 } 2207 2208 return (void*)lib; 2209} 2210 2211U_INTERNAL void U_EXPORT2 2212uprv_dl_close(void *lib, UErrorCode *status) { 2213 HMODULE handle = (HMODULE)lib; 2214 if(U_FAILURE(*status)) return; 2215 2216 FreeLibrary(handle); 2217 2218 return; 2219} 2220 2221 2222U_INTERNAL UVoidFunction* U_EXPORT2 2223uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2224 HMODULE handle = (HMODULE)lib; 2225 UVoidFunction* addr = NULL; 2226 2227 if(U_FAILURE(*status) || lib==NULL) return NULL; 2228 2229 addr = (UVoidFunction*)GetProcAddress(handle, sym); 2230 2231 if(addr==NULL) { 2232 DWORD lastError = GetLastError(); 2233 if(lastError == ERROR_PROC_NOT_FOUND) { 2234 *status = U_MISSING_RESOURCE_ERROR; 2235 } else { 2236 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ 2237 } 2238 } 2239 2240 return addr; 2241} 2242 2243 2244#else 2245 2246/* No dynamic loading set. */ 2247 2248U_INTERNAL void * U_EXPORT2 2249uprv_dl_open(const char *libName, UErrorCode *status) { 2250 (void)libName; 2251 if(U_FAILURE(*status)) return NULL; 2252 *status = U_UNSUPPORTED_ERROR; 2253 return NULL; 2254} 2255 2256U_INTERNAL void U_EXPORT2 2257uprv_dl_close(void *lib, UErrorCode *status) { 2258 (void)lib; 2259 if(U_FAILURE(*status)) return; 2260 *status = U_UNSUPPORTED_ERROR; 2261 return; 2262} 2263 2264 2265U_INTERNAL UVoidFunction* U_EXPORT2 2266uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { 2267 (void)lib; 2268 (void)sym; 2269 if(U_SUCCESS(*status)) { 2270 *status = U_UNSUPPORTED_ERROR; 2271 } 2272 return (UVoidFunction*)NULL; 2273} 2274 2275#endif /* U_ENABLE_DYLOAD */ 2276 2277/* 2278 * Hey, Emacs, please set the following: 2279 * 2280 * Local Variables: 2281 * indent-tabs-mode: nil 2282 * End: 2283 * 2284 */ 2285