1/* 2****************************************************************************** 3* 4* Copyright (C) 1997-2010, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7****************************************************************************** 8* 9* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) 10* 11* Date Name Description 12* 04/14/97 aliu Creation. 13* 04/24/97 aliu Added getDefaultDataDirectory() and 14* getDefaultLocaleID(). 15* 04/28/97 aliu Rewritten to assume Unix and apply general methods 16* for assumed case. Non-UNIX platforms must be 17* special-cased. Rewrote numeric methods dealing 18* with NaN and Infinity to be platform independent 19* over all IEEE 754 platforms. 20* 05/13/97 aliu Restored sign of timezone 21* (semantics are hours West of GMT) 22* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, 23* nextDouble.. 24* 07/22/98 stephen Added remainder, max, min, trunc 25* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity 26* 08/24/98 stephen Added longBitsFromDouble 27* 09/08/98 stephen Minor changes for Mac Port 28* 03/02/99 stephen Removed openFile(). Added AS400 support. 29* Fixed EBCDIC tables 30* 04/15/99 stephen Converted to C. 31* 06/28/99 stephen Removed mutex locking in u_isBigEndian(). 32* 08/04/99 jeffrey R. Added OS/2 changes 33* 11/15/99 helena Integrated S/390 IEEE support. 34* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID 35* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage 36* 01/03/08 Steven L. Fake Time Support 37****************************************************************************** 38*/ 39 40/* Define _XOPEN_SOURCE for access to POSIX functions. */ 41#ifdef _XOPEN_SOURCE 42 /* Use the predefined value. */ 43#else 44 /* 45 * Version 6.0: 46 * The Open Group Base Specifications Issue 6 (IEEE Std 1003.1, 2004 Edition) 47 * also known as 48 * SUSv3 = Open Group Single UNIX Specification, Version 3 (UNIX03) 49 */ 50# define _XOPEN_SOURCE 600 51#endif 52 53/* Make sure things like readlink and such functions work. 54Poorly upgraded Solaris machines can't have this defined. 55Cleanly installed Solaris can use this #define. 56*/ 57#if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__)) 58#define _XOPEN_SOURCE_EXTENDED 1 59#endif 60 61/* include ICU headers */ 62#include "unicode/utypes.h" 63#include "unicode/putil.h" 64#include "unicode/ustring.h" 65#include "putilimp.h" 66#include "uassert.h" 67#include "umutex.h" 68#include "cmemory.h" 69#include "cstring.h" 70#include "locmap.h" 71#include "ucln_cmn.h" 72 73/* Include standard headers. */ 74#include <stdio.h> 75#include <stdlib.h> 76#include <string.h> 77#include <math.h> 78#include <locale.h> 79#include <float.h> 80#include <time.h> 81 82/* include system headers */ 83#ifdef U_WINDOWS 84# define WIN32_LEAN_AND_MEAN 85# define VC_EXTRALEAN 86# define NOUSER 87# define NOSERVICE 88# define NOIME 89# define NOMCX 90# include <windows.h> 91# include "wintz.h" 92#elif defined(U_CYGWIN) && defined(__STRICT_ANSI__) 93/* tzset isn't defined in strict ANSI on Cygwin. */ 94# undef __STRICT_ANSI__ 95#elif defined(OS400) 96# include <float.h> 97# include <qusec.h> /* error code structure */ 98# include <qusrjobi.h> 99# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ 100# include <mih/testptr.h> /* For uprv_maximumPtr */ 101#elif defined(XP_MAC) 102# include <Files.h> 103# include <IntlResources.h> 104# include <Script.h> 105# include <Folders.h> 106# include <MacTypes.h> 107# include <TextUtils.h> 108# define ICU_NO_USER_DATA_OVERRIDE 1 109#elif defined(OS390) 110#include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ 111#elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD) 112#include <limits.h> 113#include <unistd.h> 114#elif defined(U_QNX) 115#include <sys/neutrino.h> 116#elif defined(U_SOLARIS) 117# ifndef _XPG4_2 118# define _XPG4_2 119# endif 120#endif 121 122 123#if defined(U_DARWIN) 124#include <TargetConditionals.h> 125#endif 126 127#ifndef U_WINDOWS 128#include <sys/time.h> 129#endif 130 131/* 132 * Only include langinfo.h if we have a way to get the codeset. If we later 133 * depend on more feature, we can test on U_HAVE_NL_LANGINFO. 134 * 135 */ 136 137#if U_HAVE_NL_LANGINFO_CODESET 138#include <langinfo.h> 139#endif 140 141/** 142 * Simple things (presence of functions, etc) should just go in configure.in and be added to 143 * icucfg.h via autoheader. 144 */ 145#if defined(HAVE_CONFIG_H) 146#include "icucfg.h" 147#endif 148 149/* Define the extension for data files, again... */ 150#define DATA_TYPE "dat" 151 152/* Leave this copyright notice here! */ 153static const char copyright[] = U_COPYRIGHT_STRING; 154 155/* floating point implementations ------------------------------------------- */ 156 157/* We return QNAN rather than SNAN*/ 158#define SIGN 0x80000000U 159 160/* Make it easy to define certain types of constants */ 161typedef union { 162 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ 163 double d64; 164} BitPatternConversion; 165static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; 166static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; 167 168/*--------------------------------------------------------------------------- 169 Platform utilities 170 Our general strategy is to assume we're on a POSIX platform. Platforms which 171 are non-POSIX must declare themselves so. The default POSIX implementation 172 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related 173 functions). 174 ---------------------------------------------------------------------------*/ 175 176#if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400) 177# undef U_POSIX_LOCALE 178#else 179# define U_POSIX_LOCALE 1 180#endif 181 182/* 183 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble 184 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). 185*/ 186#if !IEEE_754 187static char* 188u_topNBytesOfDouble(double* d, int n) 189{ 190#if U_IS_BIG_ENDIAN 191 return (char*)d; 192#else 193 return (char*)(d + 1) - n; 194#endif 195} 196 197static char* 198u_bottomNBytesOfDouble(double* d, int n) 199{ 200#if U_IS_BIG_ENDIAN 201 return (char*)(d + 1) - n; 202#else 203 return (char*)d; 204#endif 205} 206#endif /* !IEEE_754 */ 207 208#if IEEE_754 209static UBool 210u_signBit(double d) { 211 uint8_t hiByte; 212#if U_IS_BIG_ENDIAN 213 hiByte = *(uint8_t *)&d; 214#else 215 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); 216#endif 217 return (hiByte & 0x80) != 0; 218} 219#endif 220 221 222 223#if defined (U_DEBUG_FAKETIME) 224/* Override the clock to test things without having to move the system clock. 225 * Assumes POSIX gettimeofday() will function 226 */ 227UDate fakeClock_t0 = 0; /** Time to start the clock from **/ 228UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ 229UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ 230static UMTX fakeClockMutex = NULL; 231 232static UDate getUTCtime_real() { 233 struct timeval posixTime; 234 gettimeofday(&posixTime, NULL); 235 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 236} 237 238static UDate getUTCtime_fake() { 239 umtx_lock(&fakeClockMutex); 240 if(!fakeClock_set) { 241 UDate real = getUTCtime_real(); 242 const char *fake_start = getenv("U_FAKETIME_START"); 243 if((fake_start!=NULL) && (fake_start[0]!=0)) { 244 sscanf(fake_start,"%lf",&fakeClock_t0); 245 fakeClock_dt = fakeClock_t0 - real; 246 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" 247 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", 248 fakeClock_t0, fake_start, fakeClock_dt, real); 249 } else { 250 fakeClock_dt = 0; 251 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" 252 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); 253 } 254 fakeClock_set = TRUE; 255 } 256 umtx_unlock(&fakeClockMutex); 257 258 return getUTCtime_real() + fakeClock_dt; 259} 260#endif 261 262#if defined(U_WINDOWS) 263typedef union { 264 int64_t int64; 265 FILETIME fileTime; 266} FileTimeConversion; /* This is like a ULARGE_INTEGER */ 267 268/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ 269#define EPOCH_BIAS INT64_C(116444736000000000) 270#define HECTONANOSECOND_PER_MILLISECOND 10000 271 272#endif 273 274/*--------------------------------------------------------------------------- 275 Universal Implementations 276 These are designed to work on all platforms. Try these, and if they 277 don't work on your platform, then special case your platform with new 278 implementations. 279---------------------------------------------------------------------------*/ 280 281U_CAPI UDate U_EXPORT2 282uprv_getUTCtime() 283{ 284#if defined(U_DEBUG_FAKETIME) 285 return getUTCtime_fake(); /* Hook for overriding the clock */ 286#else 287 return uprv_getRawUTCtime(); 288#endif 289} 290 291/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ 292U_CAPI UDate U_EXPORT2 293uprv_getRawUTCtime() 294{ 295#if defined(XP_MAC) 296 time_t t, t1, t2; 297 struct tm tmrec; 298 299 uprv_memset( &tmrec, 0, sizeof(tmrec) ); 300 tmrec.tm_year = 70; 301 tmrec.tm_mon = 0; 302 tmrec.tm_mday = 1; 303 t1 = mktime(&tmrec); /* seconds of 1/1/1970*/ 304 305 time(&t); 306 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 307 t2 = mktime(&tmrec); /* seconds of current GMT*/ 308 return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND; /* GMT (or UTC) in seconds since 1970*/ 309#elif defined(U_WINDOWS) 310 311 FileTimeConversion winTime; 312 GetSystemTimeAsFileTime(&winTime.fileTime); 313 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); 314#else 315 316#if defined(HAVE_GETTIMEOFDAY) 317 struct timeval posixTime; 318 gettimeofday(&posixTime, NULL); 319 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); 320#else 321 time_t epochtime; 322 time(&epochtime); 323 return (UDate)epochtime * U_MILLIS_PER_SECOND; 324#endif 325 326#endif 327} 328 329/*----------------------------------------------------------------------------- 330 IEEE 754 331 These methods detect and return NaN and infinity values for doubles 332 conforming to IEEE 754. Platforms which support this standard include X86, 333 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. 334 If this doesn't work on your platform, you have non-IEEE floating-point, and 335 will need to code your own versions. A naive implementation is to return 0.0 336 for getNaN and getInfinity, and false for isNaN and isInfinite. 337 ---------------------------------------------------------------------------*/ 338 339U_CAPI UBool U_EXPORT2 340uprv_isNaN(double number) 341{ 342#if IEEE_754 343 BitPatternConversion convertedNumber; 344 convertedNumber.d64 = number; 345 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ 346 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); 347 348#elif defined(OS390) 349 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 350 sizeof(uint32_t)); 351 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 352 sizeof(uint32_t)); 353 354 return ((highBits & 0x7F080000L) == 0x7F080000L) && 355 (lowBits == 0x00000000L); 356 357#else 358 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 359 /* you'll need to replace this default implementation with what's correct*/ 360 /* for your platform.*/ 361 return number != number; 362#endif 363} 364 365U_CAPI UBool U_EXPORT2 366uprv_isInfinite(double number) 367{ 368#if IEEE_754 369 BitPatternConversion convertedNumber; 370 convertedNumber.d64 = number; 371 /* Infinity is exactly 0x7FF0000000000000U. */ 372 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); 373#elif defined(OS390) 374 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 375 sizeof(uint32_t)); 376 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, 377 sizeof(uint32_t)); 378 379 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); 380 381#else 382 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 383 /* value, you'll need to replace this default implementation with what's*/ 384 /* correct for your platform.*/ 385 return number == (2.0 * number); 386#endif 387} 388 389U_CAPI UBool U_EXPORT2 390uprv_isPositiveInfinity(double number) 391{ 392#if IEEE_754 || defined(OS390) 393 return (UBool)(number > 0 && uprv_isInfinite(number)); 394#else 395 return uprv_isInfinite(number); 396#endif 397} 398 399U_CAPI UBool U_EXPORT2 400uprv_isNegativeInfinity(double number) 401{ 402#if IEEE_754 || defined(OS390) 403 return (UBool)(number < 0 && uprv_isInfinite(number)); 404 405#else 406 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, 407 sizeof(uint32_t)); 408 return((highBits & SIGN) && uprv_isInfinite(number)); 409 410#endif 411} 412 413U_CAPI double U_EXPORT2 414uprv_getNaN() 415{ 416#if IEEE_754 || defined(OS390) 417 return gNan.d64; 418#else 419 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ 420 /* you'll need to replace this default implementation with what's correct*/ 421 /* for your platform.*/ 422 return 0.0; 423#endif 424} 425 426U_CAPI double U_EXPORT2 427uprv_getInfinity() 428{ 429#if IEEE_754 || defined(OS390) 430 return gInf.d64; 431#else 432 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ 433 /* value, you'll need to replace this default implementation with what's*/ 434 /* correct for your platform.*/ 435 return 0.0; 436#endif 437} 438 439U_CAPI double U_EXPORT2 440uprv_floor(double x) 441{ 442 return floor(x); 443} 444 445U_CAPI double U_EXPORT2 446uprv_ceil(double x) 447{ 448 return ceil(x); 449} 450 451U_CAPI double U_EXPORT2 452uprv_round(double x) 453{ 454 return uprv_floor(x + 0.5); 455} 456 457U_CAPI double U_EXPORT2 458uprv_fabs(double x) 459{ 460 return fabs(x); 461} 462 463U_CAPI double U_EXPORT2 464uprv_modf(double x, double* y) 465{ 466 return modf(x, y); 467} 468 469U_CAPI double U_EXPORT2 470uprv_fmod(double x, double y) 471{ 472 return fmod(x, y); 473} 474 475U_CAPI double U_EXPORT2 476uprv_pow(double x, double y) 477{ 478 /* This is declared as "double pow(double x, double y)" */ 479 return pow(x, y); 480} 481 482U_CAPI double U_EXPORT2 483uprv_pow10(int32_t x) 484{ 485 return pow(10.0, (double)x); 486} 487 488U_CAPI double U_EXPORT2 489uprv_fmax(double x, double y) 490{ 491#if IEEE_754 492 /* first handle NaN*/ 493 if(uprv_isNaN(x) || uprv_isNaN(y)) 494 return uprv_getNaN(); 495 496 /* check for -0 and 0*/ 497 if(x == 0.0 && y == 0.0 && u_signBit(x)) 498 return y; 499 500#endif 501 502 /* this should work for all flt point w/o NaN and Inf special cases */ 503 return (x > y ? x : y); 504} 505 506U_CAPI double U_EXPORT2 507uprv_fmin(double x, double y) 508{ 509#if IEEE_754 510 /* first handle NaN*/ 511 if(uprv_isNaN(x) || uprv_isNaN(y)) 512 return uprv_getNaN(); 513 514 /* check for -0 and 0*/ 515 if(x == 0.0 && y == 0.0 && u_signBit(y)) 516 return y; 517 518#endif 519 520 /* this should work for all flt point w/o NaN and Inf special cases */ 521 return (x > y ? y : x); 522} 523 524/** 525 * Truncates the given double. 526 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 527 * This is different than calling floor() or ceil(): 528 * floor(3.3) = 3, floor(-3.3) = -4 529 * ceil(3.3) = 4, ceil(-3.3) = -3 530 */ 531U_CAPI double U_EXPORT2 532uprv_trunc(double d) 533{ 534#if IEEE_754 535 /* handle error cases*/ 536 if(uprv_isNaN(d)) 537 return uprv_getNaN(); 538 if(uprv_isInfinite(d)) 539 return uprv_getInfinity(); 540 541 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ 542 return ceil(d); 543 else 544 return floor(d); 545 546#else 547 return d >= 0 ? floor(d) : ceil(d); 548 549#endif 550} 551 552/** 553 * Return the largest positive number that can be represented by an integer 554 * type of arbitrary bit length. 555 */ 556U_CAPI double U_EXPORT2 557uprv_maxMantissa(void) 558{ 559 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; 560} 561 562U_CAPI double U_EXPORT2 563uprv_log(double d) 564{ 565 return log(d); 566} 567 568U_CAPI void * U_EXPORT2 569uprv_maximumPtr(void * base) 570{ 571#if defined(OS400) 572 /* 573 * With the provided function we should never be out of range of a given segment 574 * (a traditional/typical segment that is). Our segments have 5 bytes for the 575 * id and 3 bytes for the offset. The key is that the casting takes care of 576 * only retrieving the offset portion minus x1000. Hence, the smallest offset 577 * seen in a program is x001000 and when casted to an int would be 0. 578 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. 579 * 580 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is 581 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). 582 * This function determines the activation based on the pointer that is passed in and 583 * calculates the appropriate maximum available size for 584 * each pointer type (TERASPACE and non-TERASPACE) 585 * 586 * Unlike other operating systems, the pointer model isn't determined at 587 * compile time on i5/OS. 588 */ 589 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { 590 /* if it is a TERASPACE pointer the max is 2GB - 4k */ 591 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); 592 } 593 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ 594 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); 595 596#else 597 return U_MAX_PTR(base); 598#endif 599} 600 601/*--------------------------------------------------------------------------- 602 Platform-specific Implementations 603 Try these, and if they don't work on your platform, then special case your 604 platform with new implementations. 605 ---------------------------------------------------------------------------*/ 606 607/* Generic time zone layer -------------------------------------------------- */ 608 609/* Time zone utilities */ 610U_CAPI void U_EXPORT2 611uprv_tzset() 612{ 613#ifdef U_TZSET 614 U_TZSET(); 615#else 616 /* no initialization*/ 617#endif 618} 619 620U_CAPI int32_t U_EXPORT2 621uprv_timezone() 622{ 623#ifdef U_TIMEZONE 624 return U_TIMEZONE; 625#else 626 time_t t, t1, t2; 627 struct tm tmrec; 628#ifndef U_IOS 629 UBool dst_checked; 630#endif 631 int32_t tdiff = 0; 632 633 time(&t); 634 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); 635#ifndef U_IOS 636 dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ 637#endif 638 t1 = mktime(&tmrec); /* local time in seconds*/ 639 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); 640 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ 641 tdiff = t2 - t1; 642#ifndef U_IOS 643 /* On iOS the calculated tdiff is correct so and doesn't need this dst 644 shift applied. */ 645 /* imitate NT behaviour, which returns same timezone offset to GMT for 646 winter and summer*/ 647 if (dst_checked) 648 tdiff += 3600; 649#endif 650 return tdiff; 651#endif 652} 653 654/* Note that U_TZNAME does *not* have to be tzname, but if it is, 655 some platforms need to have it declared here. */ 656 657#if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN)) 658/* RS6000 and others reject char **tzname. */ 659extern U_IMPORT char *U_TZNAME[]; 660#endif 661 662#if !UCONFIG_NO_FILE_IO && ((defined(U_DARWIN) && !defined(U_IOS)) || defined(U_LINUX) || defined(U_BSD)) 663/* These platforms are likely to use Olson timezone IDs. */ 664#define CHECK_LOCALTIME_LINK 1 665#if defined(U_DARWIN) 666#include <tzfile.h> 667#define TZZONEINFO (TZDIR "/") 668#else 669#define TZDEFAULT "/etc/localtime" 670#define TZZONEINFO "/usr/share/zoneinfo/" 671#endif 672#if U_HAVE_DIRENT_H 673#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ 674/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo 675 symlinked to /etc/localtime, which makes searchForTZFile return 676 'localtime' when it's the first match. */ 677#define TZFILE_SKIP2 "localtime" 678#define SEARCH_TZFILE 679#include <dirent.h> /* Needed to search through system timezone files */ 680#endif 681static char gTimeZoneBuffer[PATH_MAX]; 682static char *gTimeZoneBufferPtr = NULL; 683#endif 684 685#ifndef U_WINDOWS 686#define isNonDigit(ch) (ch < '0' || '9' < ch) 687static UBool isValidOlsonID(const char *id) { 688 int32_t idx = 0; 689 690 /* Determine if this is something like Iceland (Olson ID) 691 or AST4ADT (non-Olson ID) */ 692 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { 693 idx++; 694 } 695 696 /* If we went through the whole string, then it might be okay. 697 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", 698 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. 699 The rest of the time it could be an Olson ID. George */ 700 return (UBool)(id[idx] == 0 701 || uprv_strcmp(id, "PST8PDT") == 0 702 || uprv_strcmp(id, "MST7MDT") == 0 703 || uprv_strcmp(id, "CST6CDT") == 0 704 || uprv_strcmp(id, "EST5EDT") == 0); 705} 706 707/* On some Unix-like OS, 'posix' subdirectory in 708 /usr/share/zoneinfo replicates the top-level contents. 'right' 709 subdirectory has the same set of files, but individual files 710 are different from those in the top-level directory or 'posix' 711 because 'right' has files for TAI (Int'l Atomic Time) while 'posix' 712 has files for UTC. 713 When the first match for /etc/localtime is in either of them 714 (usually in posix because 'right' has different file contents), 715 or TZ environment variable points to one of them, createTimeZone 716 fails because, say, 'posix/America/New_York' is not an Olson 717 timezone id ('America/New_York' is). So, we have to skip 718 'posix/' and 'right/' at the beginning. */ 719static void skipZoneIDPrefix(const char** id) { 720 if (uprv_strncmp(*id, "posix/", 6) == 0 721 || uprv_strncmp(*id, "right/", 6) == 0) 722 { 723 *id += 6; 724 } 725} 726#endif 727 728#if defined(U_TZNAME) && !defined(U_WINDOWS) 729 730#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) 731typedef struct OffsetZoneMapping { 732 int32_t offsetSeconds; 733 int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/ 734 const char *stdID; 735 const char *dstID; 736 const char *olsonID; 737} OffsetZoneMapping; 738 739/* 740This list tries to disambiguate a set of abbreviated timezone IDs and offsets 741and maps it to an Olson ID. 742Before adding anything to this list, take a look at 743icu/source/tools/tzcode/tz.alias 744Sometimes no daylight savings (0) is important to define due to aliases. 745This list can be tested with icu/source/test/compat/tzone.pl 746More values could be added to daylightType to increase precision. 747*/ 748static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { 749 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, 750 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, 751 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, 752 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, 753 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, 754 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, 755 {-36000, 2, "EST", "EST", "Australia/Sydney"}, 756 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, 757 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, 758 {-34200, 2, "CST", "CST", "Australia/South"}, 759 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, 760 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, 761 {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, 762 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, 763 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, 764 {-28800, 2, "WST", "WST", "Australia/West"}, 765 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, 766 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, 767 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, 768 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, 769 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, 770 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, 771 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, 772 {-14400, 1, "AZT", "AZST", "Asia/Baku"}, 773 {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, 774 {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, 775 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, 776 {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, 777 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ 778 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, 779 {-3600, 0, "CET", "WEST", "Africa/Algiers"}, 780 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, 781 {0, 1, "GMT", "IST", "Europe/Dublin"}, 782 {0, 1, "GMT", "BST", "Europe/London"}, 783 {0, 0, "WET", "WEST", "Africa/Casablanca"}, 784 {0, 0, "WET", "WET", "Africa/El_Aaiun"}, 785 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, 786 {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, 787 {10800, 1, "PMST", "PMDT", "America/Miquelon"}, 788 {10800, 2, "UYT", "UYST", "America/Montevideo"}, 789 {10800, 1, "WGT", "WGST", "America/Godthab"}, 790 {10800, 2, "BRT", "BRST", "Brazil/East"}, 791 {12600, 1, "NST", "NDT", "America/St_Johns"}, 792 {14400, 1, "AST", "ADT", "Canada/Atlantic"}, 793 {14400, 2, "AMT", "AMST", "America/Cuiaba"}, 794 {14400, 2, "CLT", "CLST", "Chile/Continental"}, 795 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, 796 {14400, 2, "PYT", "PYST", "America/Asuncion"}, 797 {18000, 1, "CST", "CDT", "America/Havana"}, 798 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ 799 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, 800 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, 801 {21600, 0, "CST", "CDT", "America/Guatemala"}, 802 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ 803 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ 804 {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, 805 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ 806 {32400, 1, "AKST", "AKDT", "US/Alaska"}, 807 {36000, 1, "HAST", "HADT", "US/Aleutian"} 808}; 809 810/*#define DEBUG_TZNAME*/ 811 812static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) 813{ 814 int32_t idx; 815#ifdef DEBUG_TZNAME 816 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); 817#endif 818 for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++) 819 { 820 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds 821 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType 822 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 823 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) 824 { 825 return OFFSET_ZONE_MAPPINGS[idx].olsonID; 826 } 827 } 828 return NULL; 829} 830#endif 831 832#ifdef SEARCH_TZFILE 833#define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */ 834#define MAX_READ_SIZE 512 835 836typedef struct DefaultTZInfo { 837 char* defaultTZBuffer; 838 int64_t defaultTZFileSize; 839 FILE* defaultTZFilePtr; 840 UBool defaultTZstatus; 841 int32_t defaultTZPosition; 842} DefaultTZInfo; 843 844/* 845 * This method compares the two files given to see if they are a match. 846 * It is currently use to compare two TZ files. 847 */ 848static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { 849 FILE* file; 850 int64_t sizeFile; 851 int64_t sizeFileLeft; 852 int32_t sizeFileRead; 853 int32_t sizeFileToRead; 854 char bufferFile[MAX_READ_SIZE]; 855 UBool result = TRUE; 856 857 if (tzInfo->defaultTZFilePtr == NULL) { 858 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); 859 } 860 file = fopen(TZFileName, "r"); 861 862 tzInfo->defaultTZPosition = 0; /* reset position to begin search */ 863 864 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { 865 /* First check that the file size are equal. */ 866 if (tzInfo->defaultTZFileSize == 0) { 867 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); 868 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); 869 } 870 fseek(file, 0, SEEK_END); 871 sizeFile = ftell(file); 872 sizeFileLeft = sizeFile; 873 874 if (sizeFile != tzInfo->defaultTZFileSize) { 875 result = FALSE; 876 } else { 877 /* Store the data from the files in seperate buffers and 878 * compare each byte to determine equality. 879 */ 880 if (tzInfo->defaultTZBuffer == NULL) { 881 rewind(tzInfo->defaultTZFilePtr); 882 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); 883 fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); 884 } 885 rewind(file); 886 while(sizeFileLeft > 0) { 887 uprv_memset(bufferFile, 0, MAX_READ_SIZE); 888 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; 889 890 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); 891 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { 892 result = FALSE; 893 break; 894 } 895 sizeFileLeft -= sizeFileRead; 896 tzInfo->defaultTZPosition += sizeFileRead; 897 } 898 } 899 } else { 900 result = FALSE; 901 } 902 903 if (file != NULL) { 904 fclose(file); 905 } 906 907 return result; 908} 909/* 910 * This method recursively traverses the directory given for a matching TZ file and returns the first match. 911 */ 912/* dirent also lists two entries: "." and ".." that we can safely ignore. */ 913#define SKIP1 "." 914#define SKIP2 ".." 915static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = ""; 916static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { 917 char curpath[MAX_PATH_SIZE]; 918 DIR* dirp = opendir(path); 919 DIR* subDirp = NULL; 920 struct dirent* dirEntry = NULL; 921 922 char* result = NULL; 923 if (dirp == NULL) { 924 return result; 925 } 926 927 /* Save the current path */ 928 uprv_memset(curpath, 0, MAX_PATH_SIZE); 929 uprv_strcpy(curpath, path); 930 931 /* Check each entry in the directory. */ 932 while((dirEntry = readdir(dirp)) != NULL) { 933 const char* dirName = dirEntry->d_name; 934 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) { 935 /* Create a newpath with the new entry to test each entry in the directory. */ 936 char newpath[MAX_PATH_SIZE]; 937 uprv_strcpy(newpath, curpath); 938 uprv_strcat(newpath, dirName); 939 940 if ((subDirp = opendir(newpath)) != NULL) { 941 /* If this new path is a directory, make a recursive call with the newpath. */ 942 closedir(subDirp); 943 uprv_strcat(newpath, "/"); 944 result = searchForTZFile(newpath, tzInfo); 945 /* 946 Have to get out here. Otherwise, we'd keep looking 947 and return the first match in the top-level directory 948 if there's a match in the top-level. If not, this function 949 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). 950 It worked without this in most cases because we have a fallback of calling 951 localtime_r to figure out the default timezone. 952 */ 953 if (result != NULL) 954 break; 955 } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { 956 if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) { 957 const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1; 958 skipZoneIDPrefix(&zoneid); 959 uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid); 960 result = SEARCH_TZFILE_RESULT; 961 /* Get out after the first one found. */ 962 break; 963 } 964 } 965 } 966 } 967 closedir(dirp); 968 return result; 969} 970#endif 971U_CAPI const char* U_EXPORT2 972uprv_tzname(int n) 973{ 974 const char *tzid = NULL; 975#ifdef U_WINDOWS 976 tzid = uprv_detectWindowsTimeZone(); 977 978 if (tzid != NULL) { 979 return tzid; 980 } 981#else 982 983/*#if defined(U_DARWIN) 984 int ret; 985 986 tzid = getenv("TZFILE"); 987 if (tzid != NULL) { 988 return tzid; 989 } 990#endif*/ 991 992/* This code can be temporarily disabled to test tzname resolution later on. */ 993#ifndef DEBUG_TZNAME 994 tzid = getenv("TZ"); 995 if (tzid != NULL && isValidOlsonID(tzid)) 996 { 997 /* This might be a good Olson ID. */ 998 skipZoneIDPrefix(&tzid); 999 return tzid; 1000 } 1001 /* else U_TZNAME will give a better result. */ 1002#endif 1003 1004#if defined(CHECK_LOCALTIME_LINK) 1005 /* Caller must handle threading issues */ 1006 if (gTimeZoneBufferPtr == NULL) { 1007 /* 1008 This is a trick to look at the name of the link to get the Olson ID 1009 because the tzfile contents is underspecified. 1010 This isn't guaranteed to work because it may not be a symlink. 1011 */ 1012 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)); 1013 if (0 < ret) { 1014 int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO); 1015 gTimeZoneBuffer[ret] = 0; 1016 if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0 1017 && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen)) 1018 { 1019 return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen); 1020 } 1021 } else { 1022#if defined(SEARCH_TZFILE) 1023 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); 1024 if (tzInfo != NULL) { 1025 tzInfo->defaultTZBuffer = NULL; 1026 tzInfo->defaultTZFileSize = 0; 1027 tzInfo->defaultTZFilePtr = NULL; 1028 tzInfo->defaultTZstatus = FALSE; 1029 tzInfo->defaultTZPosition = 0; 1030 1031 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); 1032 1033 /* Free previously allocated memory */ 1034 if (tzInfo->defaultTZBuffer != NULL) { 1035 uprv_free(tzInfo->defaultTZBuffer); 1036 } 1037 if (tzInfo->defaultTZFilePtr != NULL) { 1038 fclose(tzInfo->defaultTZFilePtr); 1039 } 1040 uprv_free(tzInfo); 1041 } 1042 1043 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { 1044 return gTimeZoneBufferPtr; 1045 } 1046#endif 1047 } 1048 } 1049 else { 1050 return gTimeZoneBufferPtr; 1051 } 1052#endif 1053#endif 1054 1055#ifdef U_TZNAME 1056#ifdef U_WINDOWS 1057 /* The return value is free'd in timezone.cpp on Windows because 1058 * the other code path returns a pointer to a heap location. */ 1059 return uprv_strdup(U_TZNAME[n]); 1060#else 1061 /* 1062 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. 1063 So we remap the abbreviation to an olson ID. 1064 1065 Since Windows exposes a little more timezone information, 1066 we normally don't use this code on Windows because 1067 uprv_detectWindowsTimeZone should have already given the correct answer. 1068 */ 1069 { 1070 struct tm juneSol, decemberSol; 1071 int daylightType; 1072 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ 1073 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ 1074 1075 /* This probing will tell us when daylight savings occurs. */ 1076 localtime_r(&juneSolstice, &juneSol); 1077 localtime_r(&decemberSolstice, &decemberSol); 1078 daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0); 1079 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); 1080 if (tzid != NULL) { 1081 return tzid; 1082 } 1083 } 1084 return U_TZNAME[n]; 1085#endif 1086#else 1087 return ""; 1088#endif 1089} 1090 1091/* Get and set the ICU data directory --------------------------------------- */ 1092 1093static char *gDataDirectory = NULL; 1094#if U_POSIX_LOCALE 1095 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */ 1096#endif 1097 1098static UBool U_CALLCONV putil_cleanup(void) 1099{ 1100 if (gDataDirectory && *gDataDirectory) { 1101 uprv_free(gDataDirectory); 1102 } 1103 gDataDirectory = NULL; 1104#if U_POSIX_LOCALE 1105 if (gCorrectedPOSIXLocale) { 1106 uprv_free(gCorrectedPOSIXLocale); 1107 gCorrectedPOSIXLocale = NULL; 1108 } 1109#endif 1110 return TRUE; 1111} 1112 1113/* 1114 * Set the data directory. 1115 * Make a copy of the passed string, and set the global data dir to point to it. 1116 * TODO: see bug #2849, regarding thread safety. 1117 */ 1118U_CAPI void U_EXPORT2 1119u_setDataDirectory(const char *directory) { 1120 char *newDataDir; 1121 int32_t length; 1122 1123 if(directory==NULL || *directory==0) { 1124 /* A small optimization to prevent the malloc and copy when the 1125 shared library is used, and this is a way to make sure that NULL 1126 is never returned. 1127 */ 1128 newDataDir = (char *)""; 1129 } 1130 else { 1131 length=(int32_t)uprv_strlen(directory); 1132 newDataDir = (char *)uprv_malloc(length + 2); 1133 /* Exit out if newDataDir could not be created. */ 1134 if (newDataDir == NULL) { 1135 return; 1136 } 1137 uprv_strcpy(newDataDir, directory); 1138 1139#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1140 { 1141 char *p; 1142 while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) { 1143 *p = U_FILE_SEP_CHAR; 1144 } 1145 } 1146#endif 1147 } 1148 1149 umtx_lock(NULL); 1150 if (gDataDirectory && *gDataDirectory) { 1151 uprv_free(gDataDirectory); 1152 } 1153 gDataDirectory = newDataDir; 1154 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1155 umtx_unlock(NULL); 1156} 1157 1158U_CAPI UBool U_EXPORT2 1159uprv_pathIsAbsolute(const char *path) 1160{ 1161 if(!path || !*path) { 1162 return FALSE; 1163 } 1164 1165 if(*path == U_FILE_SEP_CHAR) { 1166 return TRUE; 1167 } 1168 1169#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) 1170 if(*path == U_FILE_ALT_SEP_CHAR) { 1171 return TRUE; 1172 } 1173#endif 1174 1175#if defined(U_WINDOWS) 1176 if( (((path[0] >= 'A') && (path[0] <= 'Z')) || 1177 ((path[0] >= 'a') && (path[0] <= 'z'))) && 1178 path[1] == ':' ) { 1179 return TRUE; 1180 } 1181#endif 1182 1183 return FALSE; 1184} 1185 1186/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR 1187 until some client wrapper makefiles are updated */ 1188#if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR 1189# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1190# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" 1191# endif 1192#endif 1193 1194U_CAPI const char * U_EXPORT2 1195u_getDataDirectory(void) { 1196 const char *path = NULL; 1197#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1198 char datadir_path_buffer[PATH_MAX]; 1199#endif 1200 1201 /* if we have the directory, then return it immediately */ 1202 UMTX_CHECK(NULL, gDataDirectory, path); 1203 1204 if(path) { 1205 return path; 1206 } 1207 1208 /* 1209 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to 1210 override ICU's data with the ICU_DATA environment variable. This prevents 1211 problems where multiple custom copies of ICU's specific version of data 1212 are installed on a system. Either the application must define the data 1213 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling 1214 ICU, set the data with udata_setCommonData or trust that all of the 1215 required data is contained in ICU's data library that contains 1216 the entry point defined by U_ICUDATA_ENTRY_POINT. 1217 1218 There may also be some platforms where environment variables 1219 are not allowed. 1220 */ 1221# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO 1222 /* First try to get the environment variable */ 1223 path=getenv("ICU_DATA"); 1224# endif 1225 1226 /* ICU_DATA_DIR may be set as a compile option. 1227 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time 1228 * and is used only when data is built in archive mode eliminating the need 1229 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation 1230 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to 1231 * set their own path. 1232 */ 1233#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) 1234 if(path==NULL || *path==0) { 1235# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1236 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); 1237# endif 1238# ifdef ICU_DATA_DIR 1239 path=ICU_DATA_DIR; 1240# else 1241 path=U_ICU_DATA_DEFAULT_DIR; 1242# endif 1243# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) 1244 if (prefix != NULL) { 1245 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); 1246 path=datadir_path_buffer; 1247 } 1248# endif 1249 } 1250#endif 1251 1252 if(path==NULL) { 1253 /* It looks really bad, set it to something. */ 1254 path = ""; 1255 } 1256 1257 u_setDataDirectory(path); 1258 return gDataDirectory; 1259} 1260 1261 1262 1263 1264 1265/* Macintosh-specific locale information ------------------------------------ */ 1266#ifdef XP_MAC 1267 1268typedef struct { 1269 int32_t script; 1270 int32_t region; 1271 int32_t lang; 1272 int32_t date_region; 1273 const char* posixID; 1274} mac_lc_rec; 1275 1276/* Todo: This will be updated with a newer version from www.unicode.org web 1277 page when it's available.*/ 1278#define MAC_LC_MAGIC_NUMBER -5 1279#define MAC_LC_INIT_NUMBER -9 1280 1281static const mac_lc_rec mac_lc_recs[] = { 1282 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US", 1283 /* United States*/ 1284 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR", 1285 /* France*/ 1286 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB", 1287 /* Great Britain*/ 1288 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE", 1289 /* Germany*/ 1290 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT", 1291 /* Italy*/ 1292 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL", 1293 /* Metherlands*/ 1294 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE", 1295 /* French for Belgium or Lxembourg*/ 1296 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE", 1297 /* Sweden*/ 1298 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK", 1299 /* Denmark*/ 1300 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT", 1301 /* Portugal*/ 1302 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA", 1303 /* French Canada*/ 1304 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS", 1305 /* Israel*/ 1306 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP", 1307 /* Japan*/ 1308 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU", 1309 /* Australia*/ 1310 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE", 1311 /* the Arabic world (?)*/ 1312 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI", 1313 /* Finland*/ 1314 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH", 1315 /* French for Switzerland*/ 1316 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH", 1317 /* German for Switzerland*/ 1318 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR", 1319 /* Greece*/ 1320 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS", 1321 /* Iceland ===*/ 1322 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/ 1323 /* Malta ===*/ 1324 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/ 1325 /* Cyprus ===*/ 1326 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR", 1327 /* Turkey ===*/ 1328 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU", 1329 /* Croatian system for Yugoslavia*/ 1330 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/ 1331 /* Hindi system for India*/ 1332 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/ 1333 /* Pakistan*/ 1334 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT", 1335 /* Lithuania*/ 1336 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL", 1337 /* Poland*/ 1338 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU", 1339 /* Hungary*/ 1340 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE", 1341 /* Estonia*/ 1342 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV", 1343 /* Latvia*/ 1344 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/ 1345 /* Lapland [Ask Rich for the data. HS]*/ 1346 /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/ 1347 /* Faeroe Islands*/ 1348 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR", 1349 /* Iran*/ 1350 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU", 1351 /* Russia*/ 1352 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE", 1353 /* Ireland*/ 1354 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR", 1355 /* Korea*/ 1356 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN", 1357 /* People's Republic of China*/ 1358 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW", 1359 /* Taiwan*/ 1360 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH", 1361 /* Thailand*/ 1362 1363 /* fallback is en_US*/ 1364 MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1365 MAC_LC_MAGIC_NUMBER, "en_US" 1366}; 1367 1368#endif 1369 1370#if U_POSIX_LOCALE 1371/* A helper function used by uprv_getPOSIXIDForDefaultLocale and 1372 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for 1373 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. 1374 */ 1375static const char *uprv_getPOSIXIDForCategory(int category) 1376{ 1377 const char* posixID = NULL; 1378 if (category == LC_MESSAGES || category == LC_CTYPE) { 1379 /* 1380 * On Solaris two different calls to setlocale can result in 1381 * different values. Only get this value once. 1382 * 1383 * We must check this first because an application can set this. 1384 * 1385 * LC_ALL can't be used because it's platform dependent. The LANG 1386 * environment variable seems to affect LC_CTYPE variable by default. 1387 * Here is what setlocale(LC_ALL, NULL) can return. 1388 * HPUX can return 'C C C C C C C' 1389 * Solaris can return /en_US/C/C/C/C/C on the second try. 1390 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... 1391 * 1392 * The default codepage detection also needs to use LC_CTYPE. 1393 * 1394 * Do not call setlocale(LC_*, "")! Using an empty string instead 1395 * of NULL, will modify the libc behavior. 1396 */ 1397 posixID = setlocale(category, NULL); 1398 if ((posixID == 0) 1399 || (uprv_strcmp("C", posixID) == 0) 1400 || (uprv_strcmp("POSIX", posixID) == 0)) 1401 { 1402 /* Maybe we got some garbage. Try something more reasonable */ 1403 posixID = getenv("LC_ALL"); 1404 if (posixID == 0) { 1405 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); 1406 if (posixID == 0) { 1407 posixID = getenv("LANG"); 1408 } 1409 } 1410 } 1411 } 1412 if ((posixID==0) 1413 || (uprv_strcmp("C", posixID) == 0) 1414 || (uprv_strcmp("POSIX", posixID) == 0)) 1415 { 1416 /* Nothing worked. Give it a nice POSIX default value. */ 1417 posixID = "en_US_POSIX"; 1418 } 1419 return posixID; 1420} 1421 1422/* Return just the POSIX id for the default locale, whatever happens to be in 1423 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. 1424 */ 1425static const char *uprv_getPOSIXIDForDefaultLocale(void) 1426{ 1427 static const char* posixID = NULL; 1428 if (posixID == 0) { 1429 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); 1430 } 1431 return posixID; 1432} 1433 1434/* Return just the POSIX id for the default codepage, whatever happens to be in 1435 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. 1436 */ 1437static const char *uprv_getPOSIXIDForDefaultCodepage(void) 1438{ 1439 static const char* posixID = NULL; 1440 if (posixID == 0) { 1441 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); 1442 } 1443 return posixID; 1444} 1445#endif 1446 1447/* NOTE: The caller should handle thread safety */ 1448U_CAPI const char* U_EXPORT2 1449uprv_getDefaultLocaleID() 1450{ 1451#if U_POSIX_LOCALE 1452/* 1453 Note that: (a '!' means the ID is improper somehow) 1454 LC_ALL ----> default_loc codepage 1455-------------------------------------------------------- 1456 ab.CD ab CD 1457 ab@CD ab__CD - 1458 ab@CD.EF ab__CD EF 1459 1460 ab_CD.EF@GH ab_CD_GH EF 1461 1462Some 'improper' ways to do the same as above: 1463 ! ab_CD@GH.EF ab_CD_GH EF 1464 ! ab_CD.EF@GH.IJ ab_CD_GH EF 1465 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF 1466 1467 _CD@GH _CD_GH - 1468 _CD.EF@GH _CD_GH EF 1469 1470The variant cannot have dots in it. 1471The 'rightmost' variant (@xxx) wins. 1472The leftmost codepage (.xxx) wins. 1473*/ 1474 char *correctedPOSIXLocale = 0; 1475 const char* posixID = uprv_getPOSIXIDForDefaultLocale(); 1476 const char *p; 1477 const char *q; 1478 int32_t len; 1479 1480 /* Format: (no spaces) 1481 ll [ _CC ] [ . MM ] [ @ VV] 1482 1483 l = lang, C = ctry, M = charmap, V = variant 1484 */ 1485 1486 if (gCorrectedPOSIXLocale != NULL) { 1487 return gCorrectedPOSIXLocale; 1488 } 1489 1490 if ((p = uprv_strchr(posixID, '.')) != NULL) { 1491 /* assume new locale can't be larger than old one? */ 1492 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1); 1493 /* Exit on memory allocation error. */ 1494 if (correctedPOSIXLocale == NULL) { 1495 return NULL; 1496 } 1497 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1498 correctedPOSIXLocale[p-posixID] = 0; 1499 1500 /* do not copy after the @ */ 1501 if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) { 1502 correctedPOSIXLocale[p-correctedPOSIXLocale] = 0; 1503 } 1504 } 1505 1506 /* Note that we scan the *uncorrected* ID. */ 1507 if ((p = uprv_strrchr(posixID, '@')) != NULL) { 1508 if (correctedPOSIXLocale == NULL) { 1509 correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1); 1510 /* Exit on memory allocation error. */ 1511 if (correctedPOSIXLocale == NULL) { 1512 return NULL; 1513 } 1514 uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID); 1515 correctedPOSIXLocale[p-posixID] = 0; 1516 } 1517 p++; 1518 1519 /* Take care of any special cases here.. */ 1520 if (!uprv_strcmp(p, "nynorsk")) { 1521 p = "NY"; 1522 /* Don't worry about no__NY. In practice, it won't appear. */ 1523 } 1524 1525 if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { 1526 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ 1527 } 1528 else { 1529 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ 1530 } 1531 1532 if ((q = uprv_strchr(p, '.')) != NULL) { 1533 /* How big will the resulting string be? */ 1534 len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); 1535 uprv_strncat(correctedPOSIXLocale, p, q-p); 1536 correctedPOSIXLocale[len] = 0; 1537 } 1538 else { 1539 /* Anything following the @ sign */ 1540 uprv_strcat(correctedPOSIXLocale, p); 1541 } 1542 1543 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? 1544 * How about 'russian' -> 'ru'? 1545 * Many of the other locales using ISO codes will be handled by the 1546 * canonicalization functions in uloc_getDefault. 1547 */ 1548 } 1549 1550 /* Was a correction made? */ 1551 if (correctedPOSIXLocale != NULL) { 1552 posixID = correctedPOSIXLocale; 1553 } 1554 else { 1555 /* copy it, just in case the original pointer goes away. See j2395 */ 1556 correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1); 1557 /* Exit on memory allocation error. */ 1558 if (correctedPOSIXLocale == NULL) { 1559 return NULL; 1560 } 1561 posixID = uprv_strcpy(correctedPOSIXLocale, posixID); 1562 } 1563 1564 if (gCorrectedPOSIXLocale == NULL) { 1565 gCorrectedPOSIXLocale = correctedPOSIXLocale; 1566 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); 1567 correctedPOSIXLocale = NULL; 1568 } 1569 1570 if (correctedPOSIXLocale != NULL) { /* Was already set - clean up. */ 1571 uprv_free(correctedPOSIXLocale); 1572 } 1573 1574 return posixID; 1575 1576#elif defined(U_WINDOWS) 1577 UErrorCode status = U_ZERO_ERROR; 1578 LCID id = GetThreadLocale(); 1579 const char* locID = uprv_convertToPosix(id, &status); 1580 1581 if (U_FAILURE(status)) { 1582 locID = "en_US"; 1583 } 1584 return locID; 1585 1586#elif defined(XP_MAC) 1587 int32_t script = MAC_LC_INIT_NUMBER; 1588 /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/ 1589 int32_t region = MAC_LC_INIT_NUMBER; 1590 /* = GetScriptManagerVariable(smRegionCode);*/ 1591 int32_t lang = MAC_LC_INIT_NUMBER; 1592 /* = GetScriptManagerVariable(smScriptLang);*/ 1593 int32_t date_region = MAC_LC_INIT_NUMBER; 1594 const char* posixID = 0; 1595 int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec); 1596 int32_t i; 1597 Intl1Hndl ih; 1598 1599 ih = (Intl1Hndl) GetIntlResource(1); 1600 if (ih) 1601 date_region = ((uint16_t)(*ih)->intl1Vers) >> 8; 1602 1603 for (i = 0; i < count; i++) { 1604 if ( ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER) 1605 || (mac_lc_recs[i].script == script)) 1606 && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER) 1607 || (mac_lc_recs[i].region == region)) 1608 && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER) 1609 || (mac_lc_recs[i].lang == lang)) 1610 && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER) 1611 || (mac_lc_recs[i].date_region == date_region)) 1612 ) 1613 { 1614 posixID = mac_lc_recs[i].posixID; 1615 break; 1616 } 1617 } 1618 1619 return posixID; 1620 1621#elif defined(OS400) 1622 /* locales are process scoped and are by definition thread safe */ 1623 static char correctedLocale[64]; 1624 const char *localeID = getenv("LC_ALL"); 1625 char *p; 1626 1627 if (localeID == NULL) 1628 localeID = getenv("LANG"); 1629 if (localeID == NULL) 1630 localeID = setlocale(LC_ALL, NULL); 1631 /* Make sure we have something... */ 1632 if (localeID == NULL) 1633 return "en_US_POSIX"; 1634 1635 /* Extract the locale name from the path. */ 1636 if((p = uprv_strrchr(localeID, '/')) != NULL) 1637 { 1638 /* Increment p to start of locale name. */ 1639 p++; 1640 localeID = p; 1641 } 1642 1643 /* Copy to work location. */ 1644 uprv_strcpy(correctedLocale, localeID); 1645 1646 /* Strip off the '.locale' extension. */ 1647 if((p = uprv_strchr(correctedLocale, '.')) != NULL) { 1648 *p = 0; 1649 } 1650 1651 /* Upper case the locale name. */ 1652 T_CString_toUpperCase(correctedLocale); 1653 1654 /* See if we are using the POSIX locale. Any of the 1655 * following are equivalent and use the same QLGPGCMA 1656 * (POSIX) locale. 1657 * QLGPGCMA2 means UCS2 1658 * QLGPGCMA_4 means UTF-32 1659 * QLGPGCMA_8 means UTF-8 1660 */ 1661 if ((uprv_strcmp("C", correctedLocale) == 0) || 1662 (uprv_strcmp("POSIX", correctedLocale) == 0) || 1663 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) 1664 { 1665 uprv_strcpy(correctedLocale, "en_US_POSIX"); 1666 } 1667 else 1668 { 1669 int16_t LocaleLen; 1670 1671 /* Lower case the lang portion. */ 1672 for(p = correctedLocale; *p != 0 && *p != '_'; p++) 1673 { 1674 *p = uprv_tolower(*p); 1675 } 1676 1677 /* Adjust for Euro. After '_E' add 'URO'. */ 1678 LocaleLen = uprv_strlen(correctedLocale); 1679 if (correctedLocale[LocaleLen - 2] == '_' && 1680 correctedLocale[LocaleLen - 1] == 'E') 1681 { 1682 uprv_strcat(correctedLocale, "URO"); 1683 } 1684 1685 /* If using Lotus-based locale then convert to 1686 * equivalent non Lotus. 1687 */ 1688 else if (correctedLocale[LocaleLen - 2] == '_' && 1689 correctedLocale[LocaleLen - 1] == 'L') 1690 { 1691 correctedLocale[LocaleLen - 2] = 0; 1692 } 1693 1694 /* There are separate simplified and traditional 1695 * locales called zh_HK_S and zh_HK_T. 1696 */ 1697 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) 1698 { 1699 uprv_strcpy(correctedLocale, "zh_HK"); 1700 } 1701 1702 /* A special zh_CN_GBK locale... 1703 */ 1704 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) 1705 { 1706 uprv_strcpy(correctedLocale, "zh_CN"); 1707 } 1708 1709 } 1710 1711 return correctedLocale; 1712#endif 1713 1714} 1715 1716#if !U_CHARSET_IS_UTF8 1717#if U_POSIX_LOCALE 1718/* 1719Due to various platform differences, one platform may specify a charset, 1720when they really mean a different charset. Remap the names so that they are 1721compatible with ICU. Only conflicting/ambiguous aliases should be resolved 1722here. Before adding anything to this function, please consider adding unique 1723names to the ICU alias table in the data directory. 1724*/ 1725static const char* 1726remapPlatformDependentCodepage(const char *locale, const char *name) { 1727 if (locale != NULL && *locale == 0) { 1728 /* Make sure that an empty locale is handled the same way. */ 1729 locale = NULL; 1730 } 1731 if (name == NULL) { 1732 return NULL; 1733 } 1734#if defined(U_AIX) 1735 if (uprv_strcmp(name, "IBM-943") == 0) { 1736 /* Use the ASCII compatible ibm-943 */ 1737 name = "Shift-JIS"; 1738 } 1739 else if (uprv_strcmp(name, "IBM-1252") == 0) { 1740 /* Use the windows-1252 that contains the Euro */ 1741 name = "IBM-5348"; 1742 } 1743#elif defined(U_SOLARIS) 1744 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { 1745 /* Solaris underspecifies the "EUC" name. */ 1746 if (uprv_strcmp(locale, "zh_CN") == 0) { 1747 name = "EUC-CN"; 1748 } 1749 else if (uprv_strcmp(locale, "zh_TW") == 0) { 1750 name = "EUC-TW"; 1751 } 1752 else if (uprv_strcmp(locale, "ko_KR") == 0) { 1753 name = "EUC-KR"; 1754 } 1755 } 1756 else if (uprv_strcmp(name, "eucJP") == 0) { 1757 /* 1758 ibm-954 is the best match. 1759 ibm-33722 is the default for eucJP (similar to Windows). 1760 */ 1761 name = "eucjis"; 1762 } 1763 else if (uprv_strcmp(name, "646") == 0) { 1764 /* 1765 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was 1766 * ISO-8859-1 instead of US-ASCII(646). 1767 */ 1768 name = "ISO-8859-1"; 1769 } 1770#elif defined(U_DARWIN) 1771 if (locale == NULL && *name == 0) { 1772 /* 1773 No locale was specified, and an empty name was passed in. 1774 This usually indicates that nl_langinfo didn't return valid information. 1775 Mac OS X uses UTF-8 by default (especially the locale data and console). 1776 */ 1777 name = "UTF-8"; 1778 } 1779 else if (uprv_strcmp(name, "CP949") == 0) { 1780 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1781 name = "EUC-KR"; 1782 } 1783 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { 1784 /* 1785 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1786 */ 1787 name = "UTF-8"; 1788 } 1789#elif defined(U_BSD) 1790 if (uprv_strcmp(name, "CP949") == 0) { 1791 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ 1792 name = "EUC-KR"; 1793 } 1794#elif defined(U_HPUX) 1795 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { 1796 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ 1797 /* zh_TW.big5 is not the same charset as zh_HK.big5! */ 1798 name = "hkbig5"; 1799 } 1800 else if (uprv_strcmp(name, "eucJP") == 0) { 1801 /* 1802 ibm-1350 is the best match, but unavailable. 1803 ibm-954 is mostly a superset of ibm-1350. 1804 ibm-33722 is the default for eucJP (similar to Windows). 1805 */ 1806 name = "eucjis"; 1807 } 1808#elif defined(U_LINUX) 1809 if (locale != NULL && uprv_strcmp(name, "euc") == 0) { 1810 /* Linux underspecifies the "EUC" name. */ 1811 if (uprv_strcmp(locale, "korean") == 0) { 1812 name = "EUC-KR"; 1813 } 1814 else if (uprv_strcmp(locale, "japanese") == 0) { 1815 /* See comment below about eucJP */ 1816 name = "eucjis"; 1817 } 1818 } 1819 else if (uprv_strcmp(name, "eucjp") == 0) { 1820 /* 1821 ibm-1350 is the best match, but unavailable. 1822 ibm-954 is mostly a superset of ibm-1350. 1823 ibm-33722 is the default for eucJP (similar to Windows). 1824 */ 1825 name = "eucjis"; 1826 } 1827 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && 1828 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { 1829 /* 1830 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. 1831 */ 1832 name = "UTF-8"; 1833 } 1834 /* 1835 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of 1836 * it by falling back to 'US-ASCII' when NULL is returned from this 1837 * function. So, we don't have to worry about it here. 1838 */ 1839#endif 1840 /* return NULL when "" is passed in */ 1841 if (*name == 0) { 1842 name = NULL; 1843 } 1844 return name; 1845} 1846 1847static const char* 1848getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) 1849{ 1850 char localeBuf[100]; 1851 const char *name = NULL; 1852 char *variant = NULL; 1853 1854 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { 1855 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); 1856 uprv_strncpy(localeBuf, localeName, localeCapacity); 1857 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ 1858 name = uprv_strncpy(buffer, name+1, buffCapacity); 1859 buffer[buffCapacity-1] = 0; /* ensure NULL termination */ 1860 if ((variant = (uprv_strchr(name, '@'))) != NULL) { 1861 *variant = 0; 1862 } 1863 name = remapPlatformDependentCodepage(localeBuf, name); 1864 } 1865 return name; 1866} 1867#endif 1868 1869static const char* 1870int_getDefaultCodepage() 1871{ 1872#if defined(OS400) 1873 uint32_t ccsid = 37; /* Default to ibm-37 */ 1874 static char codepage[64]; 1875 Qwc_JOBI0400_t jobinfo; 1876 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ 1877 1878 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", 1879 "* ", " ", &error); 1880 1881 if (error.Bytes_Available == 0) { 1882 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { 1883 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; 1884 } 1885 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { 1886 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; 1887 } 1888 /* else use the default */ 1889 } 1890 sprintf(codepage,"ibm-%d", ccsid); 1891 return codepage; 1892 1893#elif defined(OS390) 1894 static char codepage[64]; 1895 1896 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); 1897 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); 1898 codepage[63] = 0; /* NULL terminate */ 1899 1900 return codepage; 1901 1902#elif defined(XP_MAC) 1903 return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */ 1904 1905#elif defined(U_WINDOWS) 1906 static char codepage[64]; 1907 sprintf(codepage, "windows-%d", GetACP()); 1908 return codepage; 1909 1910#elif U_POSIX_LOCALE 1911 static char codesetName[100]; 1912 const char *localeName = NULL; 1913 const char *name = NULL; 1914 1915 localeName = uprv_getPOSIXIDForDefaultCodepage(); 1916 uprv_memset(codesetName, 0, sizeof(codesetName)); 1917#if U_HAVE_NL_LANGINFO_CODESET 1918 /* When available, check nl_langinfo first because it usually gives more 1919 useful names. It depends on LC_CTYPE. 1920 nl_langinfo may use the same buffer as setlocale. */ 1921 { 1922 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); 1923#if defined(U_DARWIN) || defined(U_LINUX) 1924 /* 1925 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 1926 * instead of ASCII. 1927 */ 1928 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { 1929 codeset = remapPlatformDependentCodepage(localeName, codeset); 1930 } else 1931#endif 1932 { 1933 codeset = remapPlatformDependentCodepage(NULL, codeset); 1934 } 1935 1936 if (codeset != NULL) { 1937 uprv_strncpy(codesetName, codeset, sizeof(codesetName)); 1938 codesetName[sizeof(codesetName)-1] = 0; 1939 return codesetName; 1940 } 1941 } 1942#endif 1943 1944 /* Use setlocale in a nice way, and then check some environment variables. 1945 Maybe the application used setlocale already. 1946 */ 1947 uprv_memset(codesetName, 0, sizeof(codesetName)); 1948 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); 1949 if (name) { 1950 /* if we can find the codeset name from setlocale, return that. */ 1951 return name; 1952 } 1953 1954 if (*codesetName == 0) 1955 { 1956 /* Everything failed. Return US ASCII (ISO 646). */ 1957 (void)uprv_strcpy(codesetName, "US-ASCII"); 1958 } 1959 return codesetName; 1960#else 1961 return "US-ASCII"; 1962#endif 1963} 1964 1965 1966U_CAPI const char* U_EXPORT2 1967uprv_getDefaultCodepage() 1968{ 1969 static char const *name = NULL; 1970 umtx_lock(NULL); 1971 if (name == NULL) { 1972 name = int_getDefaultCodepage(); 1973 } 1974 umtx_unlock(NULL); 1975 return name; 1976} 1977#endif /* !U_CHARSET_IS_UTF8 */ 1978 1979 1980/* end of platform-specific implementation -------------- */ 1981 1982/* version handling --------------------------------------------------------- */ 1983 1984U_CAPI void U_EXPORT2 1985u_versionFromString(UVersionInfo versionArray, const char *versionString) { 1986 char *end; 1987 uint16_t part=0; 1988 1989 if(versionArray==NULL) { 1990 return; 1991 } 1992 1993 if(versionString!=NULL) { 1994 for(;;) { 1995 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); 1996 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { 1997 break; 1998 } 1999 versionString=end+1; 2000 } 2001 } 2002 2003 while(part<U_MAX_VERSION_LENGTH) { 2004 versionArray[part++]=0; 2005 } 2006} 2007 2008U_CAPI void U_EXPORT2 2009u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { 2010 if(versionArray!=NULL && versionString!=NULL) { 2011 char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; 2012 int32_t len = u_strlen(versionString); 2013 if(len>U_MAX_VERSION_STRING_LENGTH) { 2014 len = U_MAX_VERSION_STRING_LENGTH; 2015 } 2016 u_UCharsToChars(versionString, versionChars, len); 2017 versionChars[len]=0; 2018 u_versionFromString(versionArray, versionChars); 2019 } 2020} 2021 2022U_CAPI void U_EXPORT2 2023u_versionToString(UVersionInfo versionArray, char *versionString) { 2024 uint16_t count, part; 2025 uint8_t field; 2026 2027 if(versionString==NULL) { 2028 return; 2029 } 2030 2031 if(versionArray==NULL) { 2032 versionString[0]=0; 2033 return; 2034 } 2035 2036 /* count how many fields need to be written */ 2037 for(count=4; count>0 && versionArray[count-1]==0; --count) { 2038 } 2039 2040 if(count <= 1) { 2041 count = 2; 2042 } 2043 2044 /* write the first part */ 2045 /* write the decimal field value */ 2046 field=versionArray[0]; 2047 if(field>=100) { 2048 *versionString++=(char)('0'+field/100); 2049 field%=100; 2050 } 2051 if(field>=10) { 2052 *versionString++=(char)('0'+field/10); 2053 field%=10; 2054 } 2055 *versionString++=(char)('0'+field); 2056 2057 /* write the following parts */ 2058 for(part=1; part<count; ++part) { 2059 /* write a dot first */ 2060 *versionString++=U_VERSION_DELIMITER; 2061 2062 /* write the decimal field value */ 2063 field=versionArray[part]; 2064 if(field>=100) { 2065 *versionString++=(char)('0'+field/100); 2066 field%=100; 2067 } 2068 if(field>=10) { 2069 *versionString++=(char)('0'+field/10); 2070 field%=10; 2071 } 2072 *versionString++=(char)('0'+field); 2073 } 2074 2075 /* NUL-terminate */ 2076 *versionString=0; 2077} 2078 2079U_CAPI void U_EXPORT2 2080u_getVersion(UVersionInfo versionArray) { 2081 u_versionFromString(versionArray, U_ICU_VERSION); 2082} 2083 2084/** 2085 * icucfg.h dependent code 2086 */ 2087 2088#if U_ENABLE_DYLOAD 2089 2090#if defined(U_CHECK_DYLOAD) 2091 2092#if defined(HAVE_DLOPEN) 2093 2094#ifdef HAVE_DLFCN_H 2095#ifdef __MVS__ 2096#ifndef __SUSV3 2097#define __SUSV3 1 2098#endif 2099#endif 2100#include <dlfcn.h> 2101#endif 2102 2103U_INTERNAL void * U_EXPORT2 2104uprv_dl_open(const char *libName, UErrorCode *status) { 2105 void *ret = NULL; 2106 if(U_FAILURE(*status)) return ret; 2107 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); 2108 if(ret==NULL) { 2109#ifndef U_TRACE_DYLOAD 2110 perror("dlopen"); 2111#endif 2112 *status = U_MISSING_RESOURCE_ERROR; 2113 } 2114 return ret; 2115} 2116 2117U_INTERNAL void U_EXPORT2 2118uprv_dl_close(void *lib, UErrorCode *status) { 2119 if(U_FAILURE(*status)) return; 2120 dlclose(lib); 2121} 2122 2123U_INTERNAL void* U_EXPORT2 2124uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) { 2125 void *ret = NULL; 2126 if(U_FAILURE(*status)) return ret; 2127 ret = dlsym(lib, sym); 2128 if(ret == NULL) { 2129 *status = U_MISSING_RESOURCE_ERROR; 2130 } 2131 return ret; 2132} 2133 2134#else 2135 2136/* null (nonexistent) implementation. */ 2137 2138U_INTERNAL void * U_EXPORT2 2139uprv_dl_open(const char *libName, UErrorCode *status) { 2140 if(U_FAILURE(*status)) return NULL; 2141 *status = U_UNSUPPORTED_ERROR; 2142 return NULL; 2143} 2144 2145U_INTERNAL void U_EXPORT2 2146uprv_dl_close(void *lib, UErrorCode *status) { 2147 if(U_FAILURE(*status)) return; 2148 *status = U_UNSUPPORTED_ERROR; 2149 return; 2150} 2151 2152 2153U_INTERNAL void* U_EXPORT2 2154uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) { 2155 if(U_FAILURE(*status)) return NULL; 2156 *status = U_UNSUPPORTED_ERROR; 2157 return NULL; 2158} 2159 2160 2161 2162#endif 2163 2164#elif defined U_WINDOWS 2165 2166U_INTERNAL void * U_EXPORT2 2167uprv_dl_open(const char *libName, UErrorCode *status) { 2168 HMODULE lib = NULL; 2169 2170 if(U_FAILURE(*status)) return NULL; 2171 2172 lib = LoadLibraryA(libName); 2173 2174 if(lib==NULL) { 2175 *status = U_MISSING_RESOURCE_ERROR; 2176 } 2177 2178 return (void*)lib; 2179} 2180 2181U_INTERNAL void U_EXPORT2 2182uprv_dl_close(void *lib, UErrorCode *status) { 2183 HMODULE handle = (HMODULE)lib; 2184 if(U_FAILURE(*status)) return; 2185 2186 FreeLibrary(handle); 2187 2188 return; 2189} 2190 2191 2192U_INTERNAL void* U_EXPORT2 2193uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) { 2194 HMODULE handle = (HMODULE)lib; 2195 void * addr = NULL; 2196 2197 if(U_FAILURE(*status) || lib==NULL) return NULL; 2198 2199 addr = GetProcAddress(handle, sym); 2200 2201 if(addr==NULL) { 2202 DWORD lastError = GetLastError(); 2203 if(lastError == ERROR_PROC_NOT_FOUND) { 2204 *status = U_MISSING_RESOURCE_ERROR; 2205 } else { 2206 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ 2207 } 2208 } 2209 2210 return addr; 2211} 2212 2213 2214#else 2215 2216/* No dynamic loading set. */ 2217 2218U_INTERNAL void * U_EXPORT2 2219uprv_dl_open(const char *libName, UErrorCode *status) { 2220 if(U_FAILURE(*status)) return NULL; 2221 *status = U_UNSUPPORTED_ERROR; 2222 return NULL; 2223} 2224 2225U_INTERNAL void U_EXPORT2 2226uprv_dl_close(void *lib, UErrorCode *status) { 2227 if(U_FAILURE(*status)) return; 2228 *status = U_UNSUPPORTED_ERROR; 2229 return; 2230} 2231 2232 2233U_INTERNAL void* U_EXPORT2 2234uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) { 2235 if(U_FAILURE(*status)) return NULL; 2236 *status = U_UNSUPPORTED_ERROR; 2237 return NULL; 2238} 2239 2240 2241#endif 2242 2243#endif /* U_ENABLE_DYLOAD */ 2244 2245/* 2246 * Hey, Emacs, please set the following: 2247 * 2248 * Local Variables: 2249 * indent-tabs-mode: nil 2250 * End: 2251 * 2252 */ 2253