1/*
2******************************************************************************
3*
4*   Copyright (C) 1997-2013, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*
9*  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10*
11*   Date        Name        Description
12*   04/14/97    aliu        Creation.
13*   04/24/97    aliu        Added getDefaultDataDirectory() and
14*                            getDefaultLocaleID().
15*   04/28/97    aliu        Rewritten to assume Unix and apply general methods
16*                            for assumed case.  Non-UNIX platforms must be
17*                            special-cased.  Rewrote numeric methods dealing
18*                            with NaN and Infinity to be platform independent
19*                             over all IEEE 754 platforms.
20*   05/13/97    aliu        Restored sign of timezone
21*                            (semantics are hours West of GMT)
22*   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23*                             nextDouble..
24*   07/22/98    stephen     Added remainder, max, min, trunc
25*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
26*   08/24/98    stephen     Added longBitsFromDouble
27*   09/08/98    stephen     Minor changes for Mac Port
28*   03/02/99    stephen     Removed openFile().  Added AS400 support.
29*                            Fixed EBCDIC tables
30*   04/15/99    stephen     Converted to C.
31*   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
32*   08/04/99    jeffrey R.  Added OS/2 changes
33*   11/15/99    helena      Integrated S/390 IEEE support.
34*   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
35*   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
36*   01/03/08    Steven L.   Fake Time Support
37******************************************************************************
38*/
39
40// Defines _XOPEN_SOURCE for access to POSIX functions.
41// Must be before any other #includes.
42#include "uposixdefs.h"
43
44/* include ICU headers */
45#include "unicode/utypes.h"
46#include "unicode/putil.h"
47#include "unicode/ustring.h"
48#include "putilimp.h"
49#include "uassert.h"
50#include "umutex.h"
51#include "cmemory.h"
52#include "cstring.h"
53#include "locmap.h"
54#include "ucln_cmn.h"
55
56/* Include standard headers. */
57#include <stdio.h>
58#include <stdlib.h>
59#include <string.h>
60#include <math.h>
61#include <locale.h>
62#include <float.h>
63
64#ifndef U_COMMON_IMPLEMENTATION
65#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
66#endif
67
68
69/* include system headers */
70#if U_PLATFORM_USES_ONLY_WIN32_API
71    /*
72     * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
73     * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
74     * to use native APIs as much as possible?
75     */
76#   define WIN32_LEAN_AND_MEAN
77#   define VC_EXTRALEAN
78#   define NOUSER
79#   define NOSERVICE
80#   define NOIME
81#   define NOMCX
82#   include <windows.h>
83#   include "wintz.h"
84#elif U_PLATFORM == U_PF_OS400
85#   include <float.h>
86#   include <qusec.h>       /* error code structure */
87#   include <qusrjobi.h>
88#   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
89#   include <mih/testptr.h> /* For uprv_maximumPtr */
90#elif U_PLATFORM == U_PF_CLASSIC_MACOS
91#   include <Files.h>
92#   include <IntlResources.h>
93#   include <Script.h>
94#   include <Folders.h>
95#   include <MacTypes.h>
96#   include <TextUtils.h>
97#   define ICU_NO_USER_DATA_OVERRIDE 1
98#elif U_PLATFORM == U_PF_OS390
99#   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
100#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
101#   include <limits.h>
102#   include <unistd.h>
103#   if U_PLATFORM == U_PF_SOLARIS
104#       ifndef _XPG4_2
105#           define _XPG4_2
106#       endif
107#   endif
108#elif U_PLATFORM == U_PF_QNX
109#   include <sys/neutrino.h>
110#endif
111
112#if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__)
113/* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
114#undef __STRICT_ANSI__
115#endif
116
117/*
118 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
119 */
120#include <time.h>
121
122#if !U_PLATFORM_USES_ONLY_WIN32_API
123#include <sys/time.h>
124#endif
125
126/*
127 * Only include langinfo.h if we have a way to get the codeset. If we later
128 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
129 *
130 */
131
132#if U_HAVE_NL_LANGINFO_CODESET
133#include <langinfo.h>
134#endif
135
136/**
137 * Simple things (presence of functions, etc) should just go in configure.in and be added to
138 * icucfg.h via autoheader.
139 */
140#if U_PLATFORM_IMPLEMENTS_POSIX
141#   if U_PLATFORM == U_PF_OS400
142#    define HAVE_DLFCN_H 0
143#    define HAVE_DLOPEN 0
144#   else
145#   ifndef HAVE_DLFCN_H
146#    define HAVE_DLFCN_H 1
147#   endif
148#   ifndef HAVE_DLOPEN
149#    define HAVE_DLOPEN 1
150#   endif
151#   endif
152#   ifndef HAVE_GETTIMEOFDAY
153#    define HAVE_GETTIMEOFDAY 1
154#   endif
155#else
156#   define HAVE_DLFCN_H 0
157#   define HAVE_DLOPEN 0
158#   define HAVE_GETTIMEOFDAY 0
159#endif
160
161#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
162
163/* Define the extension for data files, again... */
164#define DATA_TYPE "dat"
165
166/* Leave this copyright notice here! */
167static const char copyright[] = U_COPYRIGHT_STRING;
168
169/* floating point implementations ------------------------------------------- */
170
171/* We return QNAN rather than SNAN*/
172#define SIGN 0x80000000U
173
174/* Make it easy to define certain types of constants */
175typedef union {
176    int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
177    double d64;
178} BitPatternConversion;
179static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
180static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
181
182/*---------------------------------------------------------------------------
183  Platform utilities
184  Our general strategy is to assume we're on a POSIX platform.  Platforms which
185  are non-POSIX must declare themselves so.  The default POSIX implementation
186  will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
187  functions).
188  ---------------------------------------------------------------------------*/
189
190#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS || U_PLATFORM == U_PF_OS400
191#   undef U_POSIX_LOCALE
192#else
193#   define U_POSIX_LOCALE    1
194#endif
195
196/*
197    WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
198    can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
199*/
200#if !IEEE_754
201static char*
202u_topNBytesOfDouble(double* d, int n)
203{
204#if U_IS_BIG_ENDIAN
205    return (char*)d;
206#else
207    return (char*)(d + 1) - n;
208#endif
209}
210
211static char*
212u_bottomNBytesOfDouble(double* d, int n)
213{
214#if U_IS_BIG_ENDIAN
215    return (char*)(d + 1) - n;
216#else
217    return (char*)d;
218#endif
219}
220#endif   /* !IEEE_754 */
221
222#if IEEE_754
223static UBool
224u_signBit(double d) {
225    uint8_t hiByte;
226#if U_IS_BIG_ENDIAN
227    hiByte = *(uint8_t *)&d;
228#else
229    hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
230#endif
231    return (hiByte & 0x80) != 0;
232}
233#endif
234
235
236
237#if defined (U_DEBUG_FAKETIME)
238/* Override the clock to test things without having to move the system clock.
239 * Assumes POSIX gettimeofday() will function
240 */
241UDate fakeClock_t0 = 0; /** Time to start the clock from **/
242UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
243UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
244static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
245
246static UDate getUTCtime_real() {
247    struct timeval posixTime;
248    gettimeofday(&posixTime, NULL);
249    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
250}
251
252static UDate getUTCtime_fake() {
253    umtx_lock(&fakeClockMutex);
254    if(!fakeClock_set) {
255        UDate real = getUTCtime_real();
256        const char *fake_start = getenv("U_FAKETIME_START");
257        if((fake_start!=NULL) && (fake_start[0]!=0)) {
258            sscanf(fake_start,"%lf",&fakeClock_t0);
259            fakeClock_dt = fakeClock_t0 - real;
260            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
261                    "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
262                    fakeClock_t0, fake_start, fakeClock_dt, real);
263        } else {
264          fakeClock_dt = 0;
265            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
266                    "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
267        }
268        fakeClock_set = TRUE;
269    }
270    umtx_unlock(&fakeClockMutex);
271
272    return getUTCtime_real() + fakeClock_dt;
273}
274#endif
275
276#if U_PLATFORM_USES_ONLY_WIN32_API
277typedef union {
278    int64_t int64;
279    FILETIME fileTime;
280} FileTimeConversion;   /* This is like a ULARGE_INTEGER */
281
282/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
283#define EPOCH_BIAS  INT64_C(116444736000000000)
284#define HECTONANOSECOND_PER_MILLISECOND   10000
285
286#endif
287
288/*---------------------------------------------------------------------------
289  Universal Implementations
290  These are designed to work on all platforms.  Try these, and if they
291  don't work on your platform, then special case your platform with new
292  implementations.
293---------------------------------------------------------------------------*/
294
295U_CAPI UDate U_EXPORT2
296uprv_getUTCtime()
297{
298#if defined(U_DEBUG_FAKETIME)
299    return getUTCtime_fake(); /* Hook for overriding the clock */
300#else
301    return uprv_getRawUTCtime();
302#endif
303}
304
305/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
306U_CAPI UDate U_EXPORT2
307uprv_getRawUTCtime()
308{
309#if U_PLATFORM == U_PF_CLASSIC_MACOS
310    time_t t, t1, t2;
311    struct tm tmrec;
312
313    uprv_memset( &tmrec, 0, sizeof(tmrec) );
314    tmrec.tm_year = 70;
315    tmrec.tm_mon = 0;
316    tmrec.tm_mday = 1;
317    t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
318
319    time(&t);
320    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
321    t2 = mktime(&tmrec);    /* seconds of current GMT*/
322    return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
323#elif U_PLATFORM_USES_ONLY_WIN32_API
324
325    FileTimeConversion winTime;
326    GetSystemTimeAsFileTime(&winTime.fileTime);
327    return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
328#else
329
330#if HAVE_GETTIMEOFDAY
331    struct timeval posixTime;
332    gettimeofday(&posixTime, NULL);
333    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
334#else
335    time_t epochtime;
336    time(&epochtime);
337    return (UDate)epochtime * U_MILLIS_PER_SECOND;
338#endif
339
340#endif
341}
342
343/*-----------------------------------------------------------------------------
344  IEEE 754
345  These methods detect and return NaN and infinity values for doubles
346  conforming to IEEE 754.  Platforms which support this standard include X86,
347  Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
348  If this doesn't work on your platform, you have non-IEEE floating-point, and
349  will need to code your own versions.  A naive implementation is to return 0.0
350  for getNaN and getInfinity, and false for isNaN and isInfinite.
351  ---------------------------------------------------------------------------*/
352
353U_CAPI UBool U_EXPORT2
354uprv_isNaN(double number)
355{
356#if IEEE_754
357    BitPatternConversion convertedNumber;
358    convertedNumber.d64 = number;
359    /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
360    return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
361
362#elif U_PLATFORM == U_PF_OS390
363    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
364                        sizeof(uint32_t));
365    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
366                        sizeof(uint32_t));
367
368    return ((highBits & 0x7F080000L) == 0x7F080000L) &&
369      (lowBits == 0x00000000L);
370
371#else
372    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
373    /* you'll need to replace this default implementation with what's correct*/
374    /* for your platform.*/
375    return number != number;
376#endif
377}
378
379U_CAPI UBool U_EXPORT2
380uprv_isInfinite(double number)
381{
382#if IEEE_754
383    BitPatternConversion convertedNumber;
384    convertedNumber.d64 = number;
385    /* Infinity is exactly 0x7FF0000000000000U. */
386    return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
387#elif U_PLATFORM == U_PF_OS390
388    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
389                        sizeof(uint32_t));
390    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
391                        sizeof(uint32_t));
392
393    return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
394
395#else
396    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
397    /* value, you'll need to replace this default implementation with what's*/
398    /* correct for your platform.*/
399    return number == (2.0 * number);
400#endif
401}
402
403U_CAPI UBool U_EXPORT2
404uprv_isPositiveInfinity(double number)
405{
406#if IEEE_754 || U_PLATFORM == U_PF_OS390
407    return (UBool)(number > 0 && uprv_isInfinite(number));
408#else
409    return uprv_isInfinite(number);
410#endif
411}
412
413U_CAPI UBool U_EXPORT2
414uprv_isNegativeInfinity(double number)
415{
416#if IEEE_754 || U_PLATFORM == U_PF_OS390
417    return (UBool)(number < 0 && uprv_isInfinite(number));
418
419#else
420    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
421                        sizeof(uint32_t));
422    return((highBits & SIGN) && uprv_isInfinite(number));
423
424#endif
425}
426
427U_CAPI double U_EXPORT2
428uprv_getNaN()
429{
430#if IEEE_754 || U_PLATFORM == U_PF_OS390
431    return gNan.d64;
432#else
433    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
434    /* you'll need to replace this default implementation with what's correct*/
435    /* for your platform.*/
436    return 0.0;
437#endif
438}
439
440U_CAPI double U_EXPORT2
441uprv_getInfinity()
442{
443#if IEEE_754 || U_PLATFORM == U_PF_OS390
444    return gInf.d64;
445#else
446    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
447    /* value, you'll need to replace this default implementation with what's*/
448    /* correct for your platform.*/
449    return 0.0;
450#endif
451}
452
453U_CAPI double U_EXPORT2
454uprv_floor(double x)
455{
456    return floor(x);
457}
458
459U_CAPI double U_EXPORT2
460uprv_ceil(double x)
461{
462    return ceil(x);
463}
464
465U_CAPI double U_EXPORT2
466uprv_round(double x)
467{
468    return uprv_floor(x + 0.5);
469}
470
471U_CAPI double U_EXPORT2
472uprv_fabs(double x)
473{
474    return fabs(x);
475}
476
477U_CAPI double U_EXPORT2
478uprv_modf(double x, double* y)
479{
480    return modf(x, y);
481}
482
483U_CAPI double U_EXPORT2
484uprv_fmod(double x, double y)
485{
486    return fmod(x, y);
487}
488
489U_CAPI double U_EXPORT2
490uprv_pow(double x, double y)
491{
492    /* This is declared as "double pow(double x, double y)" */
493    return pow(x, y);
494}
495
496U_CAPI double U_EXPORT2
497uprv_pow10(int32_t x)
498{
499    return pow(10.0, (double)x);
500}
501
502U_CAPI double U_EXPORT2
503uprv_fmax(double x, double y)
504{
505#if IEEE_754
506    /* first handle NaN*/
507    if(uprv_isNaN(x) || uprv_isNaN(y))
508        return uprv_getNaN();
509
510    /* check for -0 and 0*/
511    if(x == 0.0 && y == 0.0 && u_signBit(x))
512        return y;
513
514#endif
515
516    /* this should work for all flt point w/o NaN and Inf special cases */
517    return (x > y ? x : y);
518}
519
520U_CAPI double U_EXPORT2
521uprv_fmin(double x, double y)
522{
523#if IEEE_754
524    /* first handle NaN*/
525    if(uprv_isNaN(x) || uprv_isNaN(y))
526        return uprv_getNaN();
527
528    /* check for -0 and 0*/
529    if(x == 0.0 && y == 0.0 && u_signBit(y))
530        return y;
531
532#endif
533
534    /* this should work for all flt point w/o NaN and Inf special cases */
535    return (x > y ? y : x);
536}
537
538/**
539 * Truncates the given double.
540 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
541 * This is different than calling floor() or ceil():
542 * floor(3.3) = 3, floor(-3.3) = -4
543 * ceil(3.3) = 4, ceil(-3.3) = -3
544 */
545U_CAPI double U_EXPORT2
546uprv_trunc(double d)
547{
548#if IEEE_754
549    /* handle error cases*/
550    if(uprv_isNaN(d))
551        return uprv_getNaN();
552    if(uprv_isInfinite(d))
553        return uprv_getInfinity();
554
555    if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
556        return ceil(d);
557    else
558        return floor(d);
559
560#else
561    return d >= 0 ? floor(d) : ceil(d);
562
563#endif
564}
565
566/**
567 * Return the largest positive number that can be represented by an integer
568 * type of arbitrary bit length.
569 */
570U_CAPI double U_EXPORT2
571uprv_maxMantissa(void)
572{
573    return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
574}
575
576U_CAPI double U_EXPORT2
577uprv_log(double d)
578{
579    return log(d);
580}
581
582U_CAPI void * U_EXPORT2
583uprv_maximumPtr(void * base)
584{
585#if U_PLATFORM == U_PF_OS400
586    /*
587     * With the provided function we should never be out of range of a given segment
588     * (a traditional/typical segment that is).  Our segments have 5 bytes for the
589     * id and 3 bytes for the offset.  The key is that the casting takes care of
590     * only retrieving the offset portion minus x1000.  Hence, the smallest offset
591     * seen in a program is x001000 and when casted to an int would be 0.
592     * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
593     *
594     * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
595     * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
596     * This function determines the activation based on the pointer that is passed in and
597     * calculates the appropriate maximum available size for
598     * each pointer type (TERASPACE and non-TERASPACE)
599     *
600     * Unlike other operating systems, the pointer model isn't determined at
601     * compile time on i5/OS.
602     */
603    if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
604        /* if it is a TERASPACE pointer the max is 2GB - 4k */
605        return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
606    }
607    /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
608    return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
609
610#else
611    return U_MAX_PTR(base);
612#endif
613}
614
615/*---------------------------------------------------------------------------
616  Platform-specific Implementations
617  Try these, and if they don't work on your platform, then special case your
618  platform with new implementations.
619  ---------------------------------------------------------------------------*/
620
621/* Generic time zone layer -------------------------------------------------- */
622
623/* Time zone utilities */
624U_CAPI void U_EXPORT2
625uprv_tzset()
626{
627#if defined(U_TZSET)
628    U_TZSET();
629#else
630    /* no initialization*/
631#endif
632}
633
634U_CAPI int32_t U_EXPORT2
635uprv_timezone()
636{
637#ifdef U_TIMEZONE
638    return U_TIMEZONE;
639#else
640    time_t t, t1, t2;
641    struct tm tmrec;
642    int32_t tdiff = 0;
643
644    time(&t);
645    uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
646#if U_PLATFORM != U_PF_IPHONE
647    UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
648#endif
649    t1 = mktime(&tmrec);                 /* local time in seconds*/
650    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
651    t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
652    tdiff = t2 - t1;
653
654#if U_PLATFORM != U_PF_IPHONE
655    /* imitate NT behaviour, which returns same timezone offset to GMT for
656       winter and summer.
657       This does not work on all platforms. For instance, on glibc on Linux
658       and on Mac OS 10.5, tdiff calculated above remains the same
659       regardless of whether DST is in effect or not. iOS is another
660       platform where this does not work. Linux + glibc and Mac OS 10.5
661       have U_TIMEZONE defined so that this code is not reached.
662    */
663    if (dst_checked)
664        tdiff += 3600;
665#endif
666    return tdiff;
667#endif
668}
669
670/* Note that U_TZNAME does *not* have to be tzname, but if it is,
671   some platforms need to have it declared here. */
672
673#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API))
674/* RS6000 and others reject char **tzname.  */
675extern U_IMPORT char *U_TZNAME[];
676#endif
677
678#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
679/* These platforms are likely to use Olson timezone IDs. */
680#define CHECK_LOCALTIME_LINK 1
681#if U_PLATFORM_IS_DARWIN_BASED
682#include <tzfile.h>
683#define TZZONEINFO      (TZDIR "/")
684#elif U_PLATFORM == U_PF_SOLARIS
685#define TZDEFAULT       "/etc/localtime"
686#define TZZONEINFO      "/usr/share/lib/zoneinfo/"
687#define TZZONEINFO2     "../usr/share/lib/zoneinfo/"
688#define TZ_ENV_CHECK    "localtime"
689#else
690#define TZDEFAULT       "/etc/localtime"
691#define TZZONEINFO      "/usr/share/zoneinfo/"
692#endif
693#if U_HAVE_DIRENT_H
694#define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
695/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
696   symlinked to /etc/localtime, which makes searchForTZFile return
697   'localtime' when it's the first match. */
698#define TZFILE_SKIP2    "localtime"
699#define SEARCH_TZFILE
700#include <dirent.h>  /* Needed to search through system timezone files */
701#endif
702static char gTimeZoneBuffer[PATH_MAX];
703static char *gTimeZoneBufferPtr = NULL;
704#endif
705
706#if !U_PLATFORM_USES_ONLY_WIN32_API
707#define isNonDigit(ch) (ch < '0' || '9' < ch)
708static UBool isValidOlsonID(const char *id) {
709    int32_t idx = 0;
710
711    /* Determine if this is something like Iceland (Olson ID)
712    or AST4ADT (non-Olson ID) */
713    while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
714        idx++;
715    }
716
717    /* If we went through the whole string, then it might be okay.
718    The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
719    "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
720    The rest of the time it could be an Olson ID. George */
721    return (UBool)(id[idx] == 0
722        || uprv_strcmp(id, "PST8PDT") == 0
723        || uprv_strcmp(id, "MST7MDT") == 0
724        || uprv_strcmp(id, "CST6CDT") == 0
725        || uprv_strcmp(id, "EST5EDT") == 0);
726}
727
728/* On some Unix-like OS, 'posix' subdirectory in
729   /usr/share/zoneinfo replicates the top-level contents. 'right'
730   subdirectory has the same set of files, but individual files
731   are different from those in the top-level directory or 'posix'
732   because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
733   has files for UTC.
734   When the first match for /etc/localtime is in either of them
735   (usually in posix because 'right' has different file contents),
736   or TZ environment variable points to one of them, createTimeZone
737   fails because, say, 'posix/America/New_York' is not an Olson
738   timezone id ('America/New_York' is). So, we have to skip
739   'posix/' and 'right/' at the beginning. */
740static void skipZoneIDPrefix(const char** id) {
741    if (uprv_strncmp(*id, "posix/", 6) == 0
742        || uprv_strncmp(*id, "right/", 6) == 0)
743    {
744        *id += 6;
745    }
746}
747#endif
748
749#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
750
751#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
752typedef struct OffsetZoneMapping {
753    int32_t offsetSeconds;
754    int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
755    const char *stdID;
756    const char *dstID;
757    const char *olsonID;
758} OffsetZoneMapping;
759
760enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
761
762/*
763This list tries to disambiguate a set of abbreviated timezone IDs and offsets
764and maps it to an Olson ID.
765Before adding anything to this list, take a look at
766icu/source/tools/tzcode/tz.alias
767Sometimes no daylight savings (0) is important to define due to aliases.
768This list can be tested with icu/source/test/compat/tzone.pl
769More values could be added to daylightType to increase precision.
770*/
771static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
772    {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
773    {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
774    {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
775    {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
776    {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
777    {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
778    {-36000, 2, "EST", "EST", "Australia/Sydney"},
779    {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
780    {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
781    {-34200, 2, "CST", "CST", "Australia/South"},
782    {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
783    {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
784    {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
785    {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
786    {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
787    {-28800, 2, "WST", "WST", "Australia/West"},
788    {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
789    {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
790    {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
791    {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
792    {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
793    {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
794    {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
795    {-14400, 1, "AZT", "AZST", "Asia/Baku"},
796    {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
797    {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
798    {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
799    {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
800    {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
801    {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
802    {-3600, 0, "CET", "WEST", "Africa/Algiers"},
803    {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
804    {0, 1, "GMT", "IST", "Europe/Dublin"},
805    {0, 1, "GMT", "BST", "Europe/London"},
806    {0, 0, "WET", "WEST", "Africa/Casablanca"},
807    {0, 0, "WET", "WET", "Africa/El_Aaiun"},
808    {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
809    {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
810    {10800, 1, "PMST", "PMDT", "America/Miquelon"},
811    {10800, 2, "UYT", "UYST", "America/Montevideo"},
812    {10800, 1, "WGT", "WGST", "America/Godthab"},
813    {10800, 2, "BRT", "BRST", "Brazil/East"},
814    {12600, 1, "NST", "NDT", "America/St_Johns"},
815    {14400, 1, "AST", "ADT", "Canada/Atlantic"},
816    {14400, 2, "AMT", "AMST", "America/Cuiaba"},
817    {14400, 2, "CLT", "CLST", "Chile/Continental"},
818    {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
819    {14400, 2, "PYT", "PYST", "America/Asuncion"},
820    {18000, 1, "CST", "CDT", "America/Havana"},
821    {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
822    {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
823    {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
824    {21600, 0, "CST", "CDT", "America/Guatemala"},
825    {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
826    {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
827    {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
828    {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
829    {32400, 1, "AKST", "AKDT", "US/Alaska"},
830    {36000, 1, "HAST", "HADT", "US/Aleutian"}
831};
832
833/*#define DEBUG_TZNAME*/
834
835static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
836{
837    int32_t idx;
838#ifdef DEBUG_TZNAME
839    fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
840#endif
841    for (idx = 0; idx < LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
842    {
843        if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
844            && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
845            && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
846            && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
847        {
848            return OFFSET_ZONE_MAPPINGS[idx].olsonID;
849        }
850    }
851    return NULL;
852}
853#endif
854
855#ifdef SEARCH_TZFILE
856#define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
857#define MAX_READ_SIZE 512
858
859typedef struct DefaultTZInfo {
860    char* defaultTZBuffer;
861    int64_t defaultTZFileSize;
862    FILE* defaultTZFilePtr;
863    UBool defaultTZstatus;
864    int32_t defaultTZPosition;
865} DefaultTZInfo;
866
867/*
868 * This method compares the two files given to see if they are a match.
869 * It is currently use to compare two TZ files.
870 */
871static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
872    FILE* file;
873    int64_t sizeFile;
874    int64_t sizeFileLeft;
875    int32_t sizeFileRead;
876    int32_t sizeFileToRead;
877    char bufferFile[MAX_READ_SIZE];
878    UBool result = TRUE;
879
880    if (tzInfo->defaultTZFilePtr == NULL) {
881        tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
882    }
883    file = fopen(TZFileName, "r");
884
885    tzInfo->defaultTZPosition = 0; /* reset position to begin search */
886
887    if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
888        /* First check that the file size are equal. */
889        if (tzInfo->defaultTZFileSize == 0) {
890            fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
891            tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
892        }
893        fseek(file, 0, SEEK_END);
894        sizeFile = ftell(file);
895        sizeFileLeft = sizeFile;
896
897        if (sizeFile != tzInfo->defaultTZFileSize) {
898            result = FALSE;
899        } else {
900            /* Store the data from the files in seperate buffers and
901             * compare each byte to determine equality.
902             */
903            if (tzInfo->defaultTZBuffer == NULL) {
904                rewind(tzInfo->defaultTZFilePtr);
905                tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
906                sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
907            }
908            rewind(file);
909            while(sizeFileLeft > 0) {
910                uprv_memset(bufferFile, 0, MAX_READ_SIZE);
911                sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
912
913                sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
914                if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
915                    result = FALSE;
916                    break;
917                }
918                sizeFileLeft -= sizeFileRead;
919                tzInfo->defaultTZPosition += sizeFileRead;
920            }
921        }
922    } else {
923        result = FALSE;
924    }
925
926    if (file != NULL) {
927        fclose(file);
928    }
929
930    return result;
931}
932/*
933 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
934 */
935/* dirent also lists two entries: "." and ".." that we can safely ignore. */
936#define SKIP1 "."
937#define SKIP2 ".."
938static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
939static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
940    char curpath[MAX_PATH_SIZE];
941    DIR* dirp = opendir(path);
942    DIR* subDirp = NULL;
943    struct dirent* dirEntry = NULL;
944
945    char* result = NULL;
946    if (dirp == NULL) {
947        return result;
948    }
949
950    /* Save the current path */
951    uprv_memset(curpath, 0, MAX_PATH_SIZE);
952    uprv_strcpy(curpath, path);
953
954    /* Check each entry in the directory. */
955    while((dirEntry = readdir(dirp)) != NULL) {
956        const char* dirName = dirEntry->d_name;
957        if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
958            /* Create a newpath with the new entry to test each entry in the directory. */
959            char newpath[MAX_PATH_SIZE];
960            uprv_strcpy(newpath, curpath);
961            uprv_strcat(newpath, dirName);
962
963            if ((subDirp = opendir(newpath)) != NULL) {
964                /* If this new path is a directory, make a recursive call with the newpath. */
965                closedir(subDirp);
966                uprv_strcat(newpath, "/");
967                result = searchForTZFile(newpath, tzInfo);
968                /*
969                 Have to get out here. Otherwise, we'd keep looking
970                 and return the first match in the top-level directory
971                 if there's a match in the top-level. If not, this function
972                 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
973                 It worked without this in most cases because we have a fallback of calling
974                 localtime_r to figure out the default timezone.
975                */
976                if (result != NULL)
977                    break;
978            } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
979                if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
980                    const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
981                    skipZoneIDPrefix(&zoneid);
982                    uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
983                    result = SEARCH_TZFILE_RESULT;
984                    /* Get out after the first one found. */
985                    break;
986                }
987            }
988        }
989    }
990    closedir(dirp);
991    return result;
992}
993#endif
994U_CAPI const char* U_EXPORT2
995uprv_tzname(int n)
996{
997    const char *tzid = NULL;
998#if U_PLATFORM_USES_ONLY_WIN32_API
999    tzid = uprv_detectWindowsTimeZone();
1000
1001    if (tzid != NULL) {
1002        return tzid;
1003    }
1004#else
1005
1006/*#if U_PLATFORM_IS_DARWIN_BASED
1007    int ret;
1008
1009    tzid = getenv("TZFILE");
1010    if (tzid != NULL) {
1011        return tzid;
1012    }
1013#endif*/
1014
1015/* This code can be temporarily disabled to test tzname resolution later on. */
1016#ifndef DEBUG_TZNAME
1017    tzid = getenv("TZ");
1018    if (tzid != NULL && isValidOlsonID(tzid)
1019#if U_PLATFORM == U_PF_SOLARIS
1020    /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1021        && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1022#endif
1023    ) {
1024        /* This might be a good Olson ID. */
1025        skipZoneIDPrefix(&tzid);
1026        return tzid;
1027    }
1028    /* else U_TZNAME will give a better result. */
1029#endif
1030
1031#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1032    /* Caller must handle threading issues */
1033    if (gTimeZoneBufferPtr == NULL) {
1034        /*
1035        This is a trick to look at the name of the link to get the Olson ID
1036        because the tzfile contents is underspecified.
1037        This isn't guaranteed to work because it may not be a symlink.
1038        */
1039        int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1040        if (0 < ret) {
1041            int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1042            gTimeZoneBuffer[ret] = 0;
1043            if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1044                && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1045            {
1046                return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1047            }
1048#if U_PLATFORM == U_PF_SOLARIS
1049            else
1050            {
1051                tzZoneInfoLen = uprv_strlen(TZZONEINFO2);
1052                if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0
1053                                && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1054                {
1055                    return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1056                }
1057            }
1058#endif
1059        } else {
1060#if defined(SEARCH_TZFILE)
1061            DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1062            if (tzInfo != NULL) {
1063                tzInfo->defaultTZBuffer = NULL;
1064                tzInfo->defaultTZFileSize = 0;
1065                tzInfo->defaultTZFilePtr = NULL;
1066                tzInfo->defaultTZstatus = FALSE;
1067                tzInfo->defaultTZPosition = 0;
1068
1069                gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1070
1071                /* Free previously allocated memory */
1072                if (tzInfo->defaultTZBuffer != NULL) {
1073                    uprv_free(tzInfo->defaultTZBuffer);
1074                }
1075                if (tzInfo->defaultTZFilePtr != NULL) {
1076                    fclose(tzInfo->defaultTZFilePtr);
1077                }
1078                uprv_free(tzInfo);
1079            }
1080
1081            if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1082                return gTimeZoneBufferPtr;
1083            }
1084#endif
1085        }
1086    }
1087    else {
1088        return gTimeZoneBufferPtr;
1089    }
1090#endif
1091#endif
1092
1093#ifdef U_TZNAME
1094#if U_PLATFORM_USES_ONLY_WIN32_API
1095    /* The return value is free'd in timezone.cpp on Windows because
1096     * the other code path returns a pointer to a heap location. */
1097    return uprv_strdup(U_TZNAME[n]);
1098#else
1099    /*
1100    U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1101    So we remap the abbreviation to an olson ID.
1102
1103    Since Windows exposes a little more timezone information,
1104    we normally don't use this code on Windows because
1105    uprv_detectWindowsTimeZone should have already given the correct answer.
1106    */
1107    {
1108        struct tm juneSol, decemberSol;
1109        int daylightType;
1110        static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1111        static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1112
1113        /* This probing will tell us when daylight savings occurs.  */
1114        localtime_r(&juneSolstice, &juneSol);
1115        localtime_r(&decemberSolstice, &decemberSol);
1116        if(decemberSol.tm_isdst > 0) {
1117          daylightType = U_DAYLIGHT_DECEMBER;
1118        } else if(juneSol.tm_isdst > 0) {
1119          daylightType = U_DAYLIGHT_JUNE;
1120        } else {
1121          daylightType = U_DAYLIGHT_NONE;
1122        }
1123        tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1124        if (tzid != NULL) {
1125            return tzid;
1126        }
1127    }
1128    return U_TZNAME[n];
1129#endif
1130#else
1131    return "";
1132#endif
1133}
1134
1135/* Get and set the ICU data directory --------------------------------------- */
1136
1137static char *gDataDirectory = NULL;
1138#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1139 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1140#endif
1141
1142static UBool U_CALLCONV putil_cleanup(void)
1143{
1144    if (gDataDirectory && *gDataDirectory) {
1145        uprv_free(gDataDirectory);
1146    }
1147    gDataDirectory = NULL;
1148#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1149    if (gCorrectedPOSIXLocale) {
1150        uprv_free(gCorrectedPOSIXLocale);
1151        gCorrectedPOSIXLocale = NULL;
1152    }
1153#endif
1154    return TRUE;
1155}
1156
1157/*
1158 * Set the data directory.
1159 *    Make a copy of the passed string, and set the global data dir to point to it.
1160 */
1161U_CAPI void U_EXPORT2
1162u_setDataDirectory(const char *directory) {
1163    char *newDataDir;
1164    int32_t length;
1165
1166    if(directory==NULL || *directory==0) {
1167        /* A small optimization to prevent the malloc and copy when the
1168        shared library is used, and this is a way to make sure that NULL
1169        is never returned.
1170        */
1171        newDataDir = (char *)"";
1172    }
1173    else {
1174        length=(int32_t)uprv_strlen(directory);
1175        newDataDir = (char *)uprv_malloc(length + 2);
1176        /* Exit out if newDataDir could not be created. */
1177        if (newDataDir == NULL) {
1178            return;
1179        }
1180        uprv_strcpy(newDataDir, directory);
1181
1182#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1183        {
1184            char *p;
1185            while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1186                *p = U_FILE_SEP_CHAR;
1187            }
1188        }
1189#endif
1190    }
1191
1192    if (gDataDirectory && *gDataDirectory) {
1193        uprv_free(gDataDirectory);
1194    }
1195    gDataDirectory = newDataDir;
1196    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1197}
1198
1199U_CAPI UBool U_EXPORT2
1200uprv_pathIsAbsolute(const char *path)
1201{
1202  if(!path || !*path) {
1203    return FALSE;
1204  }
1205
1206  if(*path == U_FILE_SEP_CHAR) {
1207    return TRUE;
1208  }
1209
1210#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1211  if(*path == U_FILE_ALT_SEP_CHAR) {
1212    return TRUE;
1213  }
1214#endif
1215
1216#if U_PLATFORM_USES_ONLY_WIN32_API
1217  if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1218       ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1219      path[1] == ':' ) {
1220    return TRUE;
1221  }
1222#endif
1223
1224  return FALSE;
1225}
1226
1227/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1228   until some client wrapper makefiles are updated */
1229#if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1230# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1231#  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1232# endif
1233#endif
1234
1235U_CAPI const char * U_EXPORT2
1236u_getDataDirectory(void) {
1237    const char *path = NULL;
1238#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1239    char datadir_path_buffer[PATH_MAX];
1240#endif
1241
1242    /* if we have the directory, then return it immediately */
1243    if(gDataDirectory) {
1244        return gDataDirectory;
1245    }
1246
1247    /*
1248    When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1249    override ICU's data with the ICU_DATA environment variable. This prevents
1250    problems where multiple custom copies of ICU's specific version of data
1251    are installed on a system. Either the application must define the data
1252    directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1253    ICU, set the data with udata_setCommonData or trust that all of the
1254    required data is contained in ICU's data library that contains
1255    the entry point defined by U_ICUDATA_ENTRY_POINT.
1256
1257    There may also be some platforms where environment variables
1258    are not allowed.
1259    */
1260#   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1261    /* First try to get the environment variable */
1262    path=getenv("ICU_DATA");
1263#   endif
1264
1265    /* ICU_DATA_DIR may be set as a compile option.
1266     * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1267     * and is used only when data is built in archive mode eliminating the need
1268     * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1269     * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1270     * set their own path.
1271     */
1272#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1273    if(path==NULL || *path==0) {
1274# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1275        const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1276# endif
1277# ifdef ICU_DATA_DIR
1278        path=ICU_DATA_DIR;
1279# else
1280        path=U_ICU_DATA_DEFAULT_DIR;
1281# endif
1282# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1283        if (prefix != NULL) {
1284            snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1285            path=datadir_path_buffer;
1286        }
1287# endif
1288    }
1289#endif
1290
1291    if(path==NULL) {
1292        /* It looks really bad, set it to something. */
1293        path = "";
1294    }
1295
1296    u_setDataDirectory(path);
1297    return gDataDirectory;
1298}
1299
1300
1301
1302
1303
1304/* Macintosh-specific locale information ------------------------------------ */
1305#if U_PLATFORM == U_PF_CLASSIC_MACOS
1306
1307typedef struct {
1308    int32_t script;
1309    int32_t region;
1310    int32_t lang;
1311    int32_t date_region;
1312    const char* posixID;
1313} mac_lc_rec;
1314
1315/* Todo: This will be updated with a newer version from www.unicode.org web
1316   page when it's available.*/
1317#define MAC_LC_MAGIC_NUMBER -5
1318#define MAC_LC_INIT_NUMBER -9
1319
1320static const mac_lc_rec mac_lc_recs[] = {
1321    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1322    /* United States*/
1323    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1324    /* France*/
1325    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1326    /* Great Britain*/
1327    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1328    /* Germany*/
1329    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1330    /* Italy*/
1331    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1332    /* Metherlands*/
1333    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1334    /* French for Belgium or Lxembourg*/
1335    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1336    /* Sweden*/
1337    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1338    /* Denmark*/
1339    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1340    /* Portugal*/
1341    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1342    /* French Canada*/
1343    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1344    /* Israel*/
1345    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1346    /* Japan*/
1347    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1348    /* Australia*/
1349    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1350    /* the Arabic world (?)*/
1351    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1352    /* Finland*/
1353    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1354    /* French for Switzerland*/
1355    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1356    /* German for Switzerland*/
1357    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1358    /* Greece*/
1359    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1360    /* Iceland ===*/
1361    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1362    /* Malta ===*/
1363    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1364    /* Cyprus ===*/
1365    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1366    /* Turkey ===*/
1367    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1368    /* Croatian system for Yugoslavia*/
1369    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1370    /* Hindi system for India*/
1371    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1372    /* Pakistan*/
1373    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1374    /* Lithuania*/
1375    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1376    /* Poland*/
1377    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1378    /* Hungary*/
1379    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1380    /* Estonia*/
1381    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1382    /* Latvia*/
1383    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1384    /* Lapland  [Ask Rich for the data. HS]*/
1385    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1386    /* Faeroe Islands*/
1387    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1388    /* Iran*/
1389    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1390    /* Russia*/
1391    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1392    /* Ireland*/
1393    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1394    /* Korea*/
1395    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1396    /* People's Republic of China*/
1397    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1398    /* Taiwan*/
1399    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1400    /* Thailand*/
1401
1402    /* fallback is en_US*/
1403    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1404    MAC_LC_MAGIC_NUMBER, "en_US"
1405};
1406
1407#endif
1408
1409#if U_POSIX_LOCALE
1410/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1411 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1412 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1413 */
1414static const char *uprv_getPOSIXIDForCategory(int category)
1415{
1416    const char* posixID = NULL;
1417    if (category == LC_MESSAGES || category == LC_CTYPE) {
1418        /*
1419        * On Solaris two different calls to setlocale can result in
1420        * different values. Only get this value once.
1421        *
1422        * We must check this first because an application can set this.
1423        *
1424        * LC_ALL can't be used because it's platform dependent. The LANG
1425        * environment variable seems to affect LC_CTYPE variable by default.
1426        * Here is what setlocale(LC_ALL, NULL) can return.
1427        * HPUX can return 'C C C C C C C'
1428        * Solaris can return /en_US/C/C/C/C/C on the second try.
1429        * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1430        *
1431        * The default codepage detection also needs to use LC_CTYPE.
1432        *
1433        * Do not call setlocale(LC_*, "")! Using an empty string instead
1434        * of NULL, will modify the libc behavior.
1435        */
1436        posixID = setlocale(category, NULL);
1437        if ((posixID == 0)
1438            || (uprv_strcmp("C", posixID) == 0)
1439            || (uprv_strcmp("POSIX", posixID) == 0))
1440        {
1441            /* Maybe we got some garbage.  Try something more reasonable */
1442            posixID = getenv("LC_ALL");
1443            if (posixID == 0) {
1444                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1445                if (posixID == 0) {
1446                    posixID = getenv("LANG");
1447                }
1448            }
1449        }
1450    }
1451    if ((posixID==0)
1452        || (uprv_strcmp("C", posixID) == 0)
1453        || (uprv_strcmp("POSIX", posixID) == 0))
1454    {
1455        /* Nothing worked.  Give it a nice POSIX default value. */
1456        posixID = "en_US_POSIX";
1457    }
1458    return posixID;
1459}
1460
1461/* Return just the POSIX id for the default locale, whatever happens to be in
1462 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1463 */
1464static const char *uprv_getPOSIXIDForDefaultLocale(void)
1465{
1466    static const char* posixID = NULL;
1467    if (posixID == 0) {
1468        posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1469    }
1470    return posixID;
1471}
1472
1473#if !U_CHARSET_IS_UTF8
1474/* Return just the POSIX id for the default codepage, whatever happens to be in
1475 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1476 */
1477static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1478{
1479    static const char* posixID = NULL;
1480    if (posixID == 0) {
1481        posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1482    }
1483    return posixID;
1484}
1485#endif
1486#endif
1487
1488/* NOTE: The caller should handle thread safety */
1489U_CAPI const char* U_EXPORT2
1490uprv_getDefaultLocaleID()
1491{
1492#if U_POSIX_LOCALE
1493/*
1494  Note that:  (a '!' means the ID is improper somehow)
1495     LC_ALL  ---->     default_loc          codepage
1496--------------------------------------------------------
1497     ab.CD             ab                   CD
1498     ab@CD             ab__CD               -
1499     ab@CD.EF          ab__CD               EF
1500
1501     ab_CD.EF@GH       ab_CD_GH             EF
1502
1503Some 'improper' ways to do the same as above:
1504  !  ab_CD@GH.EF       ab_CD_GH             EF
1505  !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1506  !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1507
1508     _CD@GH            _CD_GH               -
1509     _CD.EF@GH         _CD_GH               EF
1510
1511The variant cannot have dots in it.
1512The 'rightmost' variant (@xxx) wins.
1513The leftmost codepage (.xxx) wins.
1514*/
1515    char *correctedPOSIXLocale = 0;
1516    const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1517    const char *p;
1518    const char *q;
1519    int32_t len;
1520
1521    /* Format: (no spaces)
1522    ll [ _CC ] [ . MM ] [ @ VV]
1523
1524      l = lang, C = ctry, M = charmap, V = variant
1525    */
1526
1527    if (gCorrectedPOSIXLocale != NULL) {
1528        return gCorrectedPOSIXLocale;
1529    }
1530
1531    if ((p = uprv_strchr(posixID, '.')) != NULL) {
1532        /* assume new locale can't be larger than old one? */
1533        correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1534        /* Exit on memory allocation error. */
1535        if (correctedPOSIXLocale == NULL) {
1536            return NULL;
1537        }
1538        uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1539        correctedPOSIXLocale[p-posixID] = 0;
1540
1541        /* do not copy after the @ */
1542        if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1543            correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1544        }
1545    }
1546
1547    /* Note that we scan the *uncorrected* ID. */
1548    if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1549        if (correctedPOSIXLocale == NULL) {
1550            correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1551            /* Exit on memory allocation error. */
1552            if (correctedPOSIXLocale == NULL) {
1553                return NULL;
1554            }
1555            uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1556            correctedPOSIXLocale[p-posixID] = 0;
1557        }
1558        p++;
1559
1560        /* Take care of any special cases here.. */
1561        if (!uprv_strcmp(p, "nynorsk")) {
1562            p = "NY";
1563            /* Don't worry about no__NY. In practice, it won't appear. */
1564        }
1565
1566        if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1567            uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1568        }
1569        else {
1570            uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1571        }
1572
1573        if ((q = uprv_strchr(p, '.')) != NULL) {
1574            /* How big will the resulting string be? */
1575            len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1576            uprv_strncat(correctedPOSIXLocale, p, q-p);
1577            correctedPOSIXLocale[len] = 0;
1578        }
1579        else {
1580            /* Anything following the @ sign */
1581            uprv_strcat(correctedPOSIXLocale, p);
1582        }
1583
1584        /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1585         * How about 'russian' -> 'ru'?
1586         * Many of the other locales using ISO codes will be handled by the
1587         * canonicalization functions in uloc_getDefault.
1588         */
1589    }
1590
1591    /* Was a correction made? */
1592    if (correctedPOSIXLocale != NULL) {
1593        posixID = correctedPOSIXLocale;
1594    }
1595    else {
1596        /* copy it, just in case the original pointer goes away.  See j2395 */
1597        correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1598        /* Exit on memory allocation error. */
1599        if (correctedPOSIXLocale == NULL) {
1600            return NULL;
1601        }
1602        posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1603    }
1604
1605    if (gCorrectedPOSIXLocale == NULL) {
1606        gCorrectedPOSIXLocale = correctedPOSIXLocale;
1607        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1608        correctedPOSIXLocale = NULL;
1609    }
1610
1611    if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1612        uprv_free(correctedPOSIXLocale);
1613    }
1614
1615    return posixID;
1616
1617#elif U_PLATFORM_USES_ONLY_WIN32_API
1618#define POSIX_LOCALE_CAPACITY 64
1619    UErrorCode status = U_ZERO_ERROR;
1620    char *correctedPOSIXLocale = 0;
1621
1622    if (gCorrectedPOSIXLocale != NULL) {
1623        return gCorrectedPOSIXLocale;
1624    }
1625
1626    LCID id = GetThreadLocale();
1627    correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1628    if (correctedPOSIXLocale) {
1629        int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1630        if (U_SUCCESS(status)) {
1631            *(correctedPOSIXLocale + posixLen) = 0;
1632            gCorrectedPOSIXLocale = correctedPOSIXLocale;
1633            ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1634        } else {
1635            uprv_free(correctedPOSIXLocale);
1636        }
1637    }
1638
1639    if (gCorrectedPOSIXLocale == NULL) {
1640        return "en_US";
1641    }
1642    return gCorrectedPOSIXLocale;
1643
1644#elif U_PLATFORM == U_PF_CLASSIC_MACOS
1645    int32_t script = MAC_LC_INIT_NUMBER;
1646    /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1647    int32_t region = MAC_LC_INIT_NUMBER;
1648    /* = GetScriptManagerVariable(smRegionCode);*/
1649    int32_t lang = MAC_LC_INIT_NUMBER;
1650    /* = GetScriptManagerVariable(smScriptLang);*/
1651    int32_t date_region = MAC_LC_INIT_NUMBER;
1652    const char* posixID = 0;
1653    int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1654    int32_t i;
1655    Intl1Hndl ih;
1656
1657    ih = (Intl1Hndl) GetIntlResource(1);
1658    if (ih)
1659        date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1660
1661    for (i = 0; i < count; i++) {
1662        if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1663             || (mac_lc_recs[i].script == script))
1664            && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1665             || (mac_lc_recs[i].region == region))
1666            && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1667             || (mac_lc_recs[i].lang == lang))
1668            && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1669             || (mac_lc_recs[i].date_region == date_region))
1670            )
1671        {
1672            posixID = mac_lc_recs[i].posixID;
1673            break;
1674        }
1675    }
1676
1677    return posixID;
1678
1679#elif U_PLATFORM == U_PF_OS400
1680    /* locales are process scoped and are by definition thread safe */
1681    static char correctedLocale[64];
1682    const  char *localeID = getenv("LC_ALL");
1683           char *p;
1684
1685    if (localeID == NULL)
1686        localeID = getenv("LANG");
1687    if (localeID == NULL)
1688        localeID = setlocale(LC_ALL, NULL);
1689    /* Make sure we have something... */
1690    if (localeID == NULL)
1691        return "en_US_POSIX";
1692
1693    /* Extract the locale name from the path. */
1694    if((p = uprv_strrchr(localeID, '/')) != NULL)
1695    {
1696        /* Increment p to start of locale name. */
1697        p++;
1698        localeID = p;
1699    }
1700
1701    /* Copy to work location. */
1702    uprv_strcpy(correctedLocale, localeID);
1703
1704    /* Strip off the '.locale' extension. */
1705    if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1706        *p = 0;
1707    }
1708
1709    /* Upper case the locale name. */
1710    T_CString_toUpperCase(correctedLocale);
1711
1712    /* See if we are using the POSIX locale.  Any of the
1713    * following are equivalent and use the same QLGPGCMA
1714    * (POSIX) locale.
1715    * QLGPGCMA2 means UCS2
1716    * QLGPGCMA_4 means UTF-32
1717    * QLGPGCMA_8 means UTF-8
1718    */
1719    if ((uprv_strcmp("C", correctedLocale) == 0) ||
1720        (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1721        (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1722    {
1723        uprv_strcpy(correctedLocale, "en_US_POSIX");
1724    }
1725    else
1726    {
1727        int16_t LocaleLen;
1728
1729        /* Lower case the lang portion. */
1730        for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1731        {
1732            *p = uprv_tolower(*p);
1733        }
1734
1735        /* Adjust for Euro.  After '_E' add 'URO'. */
1736        LocaleLen = uprv_strlen(correctedLocale);
1737        if (correctedLocale[LocaleLen - 2] == '_' &&
1738            correctedLocale[LocaleLen - 1] == 'E')
1739        {
1740            uprv_strcat(correctedLocale, "URO");
1741        }
1742
1743        /* If using Lotus-based locale then convert to
1744         * equivalent non Lotus.
1745         */
1746        else if (correctedLocale[LocaleLen - 2] == '_' &&
1747            correctedLocale[LocaleLen - 1] == 'L')
1748        {
1749            correctedLocale[LocaleLen - 2] = 0;
1750        }
1751
1752        /* There are separate simplified and traditional
1753         * locales called zh_HK_S and zh_HK_T.
1754         */
1755        else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1756        {
1757            uprv_strcpy(correctedLocale, "zh_HK");
1758        }
1759
1760        /* A special zh_CN_GBK locale...
1761        */
1762        else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1763        {
1764            uprv_strcpy(correctedLocale, "zh_CN");
1765        }
1766
1767    }
1768
1769    return correctedLocale;
1770#endif
1771
1772}
1773
1774#if !U_CHARSET_IS_UTF8
1775#if U_POSIX_LOCALE
1776/*
1777Due to various platform differences, one platform may specify a charset,
1778when they really mean a different charset. Remap the names so that they are
1779compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1780here. Before adding anything to this function, please consider adding unique
1781names to the ICU alias table in the data directory.
1782*/
1783static const char*
1784remapPlatformDependentCodepage(const char *locale, const char *name) {
1785    if (locale != NULL && *locale == 0) {
1786        /* Make sure that an empty locale is handled the same way. */
1787        locale = NULL;
1788    }
1789    if (name == NULL) {
1790        return NULL;
1791    }
1792#if U_PLATFORM == U_PF_AIX
1793    if (uprv_strcmp(name, "IBM-943") == 0) {
1794        /* Use the ASCII compatible ibm-943 */
1795        name = "Shift-JIS";
1796    }
1797    else if (uprv_strcmp(name, "IBM-1252") == 0) {
1798        /* Use the windows-1252 that contains the Euro */
1799        name = "IBM-5348";
1800    }
1801#elif U_PLATFORM == U_PF_SOLARIS
1802    if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1803        /* Solaris underspecifies the "EUC" name. */
1804        if (uprv_strcmp(locale, "zh_CN") == 0) {
1805            name = "EUC-CN";
1806        }
1807        else if (uprv_strcmp(locale, "zh_TW") == 0) {
1808            name = "EUC-TW";
1809        }
1810        else if (uprv_strcmp(locale, "ko_KR") == 0) {
1811            name = "EUC-KR";
1812        }
1813    }
1814    else if (uprv_strcmp(name, "eucJP") == 0) {
1815        /*
1816        ibm-954 is the best match.
1817        ibm-33722 is the default for eucJP (similar to Windows).
1818        */
1819        name = "eucjis";
1820    }
1821    else if (uprv_strcmp(name, "646") == 0) {
1822        /*
1823         * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1824         * ISO-8859-1 instead of US-ASCII(646).
1825         */
1826        name = "ISO-8859-1";
1827    }
1828#elif U_PLATFORM_IS_DARWIN_BASED
1829    if (locale == NULL && *name == 0) {
1830        /*
1831        No locale was specified, and an empty name was passed in.
1832        This usually indicates that nl_langinfo didn't return valid information.
1833        Mac OS X uses UTF-8 by default (especially the locale data and console).
1834        */
1835        name = "UTF-8";
1836    }
1837    else if (uprv_strcmp(name, "CP949") == 0) {
1838        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1839        name = "EUC-KR";
1840    }
1841    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1842        /*
1843         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1844         */
1845        name = "UTF-8";
1846    }
1847#elif U_PLATFORM == U_PF_BSD
1848    if (uprv_strcmp(name, "CP949") == 0) {
1849        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1850        name = "EUC-KR";
1851    }
1852#elif U_PLATFORM == U_PF_HPUX
1853    if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1854        /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1855        /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1856        name = "hkbig5";
1857    }
1858    else if (uprv_strcmp(name, "eucJP") == 0) {
1859        /*
1860        ibm-1350 is the best match, but unavailable.
1861        ibm-954 is mostly a superset of ibm-1350.
1862        ibm-33722 is the default for eucJP (similar to Windows).
1863        */
1864        name = "eucjis";
1865    }
1866#elif U_PLATFORM == U_PF_LINUX
1867    if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1868        /* Linux underspecifies the "EUC" name. */
1869        if (uprv_strcmp(locale, "korean") == 0) {
1870            name = "EUC-KR";
1871        }
1872        else if (uprv_strcmp(locale, "japanese") == 0) {
1873            /* See comment below about eucJP */
1874            name = "eucjis";
1875        }
1876    }
1877    else if (uprv_strcmp(name, "eucjp") == 0) {
1878        /*
1879        ibm-1350 is the best match, but unavailable.
1880        ibm-954 is mostly a superset of ibm-1350.
1881        ibm-33722 is the default for eucJP (similar to Windows).
1882        */
1883        name = "eucjis";
1884    }
1885    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1886            (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1887        /*
1888         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1889         */
1890        name = "UTF-8";
1891    }
1892    /*
1893     * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1894     * it by falling back to 'US-ASCII' when NULL is returned from this
1895     * function. So, we don't have to worry about it here.
1896     */
1897#endif
1898    /* return NULL when "" is passed in */
1899    if (*name == 0) {
1900        name = NULL;
1901    }
1902    return name;
1903}
1904
1905static const char*
1906getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1907{
1908    char localeBuf[100];
1909    const char *name = NULL;
1910    char *variant = NULL;
1911
1912    if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1913        size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1914        uprv_strncpy(localeBuf, localeName, localeCapacity);
1915        localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1916        name = uprv_strncpy(buffer, name+1, buffCapacity);
1917        buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1918        if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
1919            *variant = 0;
1920        }
1921        name = remapPlatformDependentCodepage(localeBuf, name);
1922    }
1923    return name;
1924}
1925#endif
1926
1927static const char*
1928int_getDefaultCodepage()
1929{
1930#if U_PLATFORM == U_PF_OS400
1931    uint32_t ccsid = 37; /* Default to ibm-37 */
1932    static char codepage[64];
1933    Qwc_JOBI0400_t jobinfo;
1934    Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1935
1936    EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1937        "*                         ", "                ", &error);
1938
1939    if (error.Bytes_Available == 0) {
1940        if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1941            ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1942        }
1943        else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1944            ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1945        }
1946        /* else use the default */
1947    }
1948    sprintf(codepage,"ibm-%d", ccsid);
1949    return codepage;
1950
1951#elif U_PLATFORM == U_PF_OS390
1952    static char codepage[64];
1953
1954    strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1955    strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1956    codepage[63] = 0; /* NULL terminate */
1957
1958    return codepage;
1959
1960#elif U_PLATFORM == U_PF_CLASSIC_MACOS
1961    return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1962
1963#elif U_PLATFORM_USES_ONLY_WIN32_API
1964    static char codepage[64];
1965    sprintf(codepage, "windows-%d", GetACP());
1966    return codepage;
1967
1968#elif U_POSIX_LOCALE
1969    static char codesetName[100];
1970    const char *localeName = NULL;
1971    const char *name = NULL;
1972
1973    localeName = uprv_getPOSIXIDForDefaultCodepage();
1974    uprv_memset(codesetName, 0, sizeof(codesetName));
1975#if U_HAVE_NL_LANGINFO_CODESET
1976    /* When available, check nl_langinfo first because it usually gives more
1977       useful names. It depends on LC_CTYPE.
1978       nl_langinfo may use the same buffer as setlocale. */
1979    {
1980        const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1981#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
1982        /*
1983         * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1984         * instead of ASCII.
1985         */
1986        if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1987            codeset = remapPlatformDependentCodepage(localeName, codeset);
1988        } else
1989#endif
1990        {
1991            codeset = remapPlatformDependentCodepage(NULL, codeset);
1992        }
1993
1994        if (codeset != NULL) {
1995            uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1996            codesetName[sizeof(codesetName)-1] = 0;
1997            return codesetName;
1998        }
1999    }
2000#endif
2001
2002    /* Use setlocale in a nice way, and then check some environment variables.
2003       Maybe the application used setlocale already.
2004    */
2005    uprv_memset(codesetName, 0, sizeof(codesetName));
2006    name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2007    if (name) {
2008        /* if we can find the codeset name from setlocale, return that. */
2009        return name;
2010    }
2011
2012    if (*codesetName == 0)
2013    {
2014        /* Everything failed. Return US ASCII (ISO 646). */
2015        (void)uprv_strcpy(codesetName, "US-ASCII");
2016    }
2017    return codesetName;
2018#else
2019    return "US-ASCII";
2020#endif
2021}
2022
2023
2024U_CAPI const char*  U_EXPORT2
2025uprv_getDefaultCodepage()
2026{
2027    static char const  *name = NULL;
2028    umtx_lock(NULL);
2029    if (name == NULL) {
2030        name = int_getDefaultCodepage();
2031    }
2032    umtx_unlock(NULL);
2033    return name;
2034}
2035#endif  /* !U_CHARSET_IS_UTF8 */
2036
2037
2038/* end of platform-specific implementation -------------- */
2039
2040/* version handling --------------------------------------------------------- */
2041
2042U_CAPI void U_EXPORT2
2043u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2044    char *end;
2045    uint16_t part=0;
2046
2047    if(versionArray==NULL) {
2048        return;
2049    }
2050
2051    if(versionString!=NULL) {
2052        for(;;) {
2053            versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2054            if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2055                break;
2056            }
2057            versionString=end+1;
2058        }
2059    }
2060
2061    while(part<U_MAX_VERSION_LENGTH) {
2062        versionArray[part++]=0;
2063    }
2064}
2065
2066U_CAPI void U_EXPORT2
2067u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2068    if(versionArray!=NULL && versionString!=NULL) {
2069        char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2070        int32_t len = u_strlen(versionString);
2071        if(len>U_MAX_VERSION_STRING_LENGTH) {
2072            len = U_MAX_VERSION_STRING_LENGTH;
2073        }
2074        u_UCharsToChars(versionString, versionChars, len);
2075        versionChars[len]=0;
2076        u_versionFromString(versionArray, versionChars);
2077    }
2078}
2079
2080U_CAPI void U_EXPORT2
2081u_versionToString(const UVersionInfo versionArray, char *versionString) {
2082    uint16_t count, part;
2083    uint8_t field;
2084
2085    if(versionString==NULL) {
2086        return;
2087    }
2088
2089    if(versionArray==NULL) {
2090        versionString[0]=0;
2091        return;
2092    }
2093
2094    /* count how many fields need to be written */
2095    for(count=4; count>0 && versionArray[count-1]==0; --count) {
2096    }
2097
2098    if(count <= 1) {
2099        count = 2;
2100    }
2101
2102    /* write the first part */
2103    /* write the decimal field value */
2104    field=versionArray[0];
2105    if(field>=100) {
2106        *versionString++=(char)('0'+field/100);
2107        field%=100;
2108    }
2109    if(field>=10) {
2110        *versionString++=(char)('0'+field/10);
2111        field%=10;
2112    }
2113    *versionString++=(char)('0'+field);
2114
2115    /* write the following parts */
2116    for(part=1; part<count; ++part) {
2117        /* write a dot first */
2118        *versionString++=U_VERSION_DELIMITER;
2119
2120        /* write the decimal field value */
2121        field=versionArray[part];
2122        if(field>=100) {
2123            *versionString++=(char)('0'+field/100);
2124            field%=100;
2125        }
2126        if(field>=10) {
2127            *versionString++=(char)('0'+field/10);
2128            field%=10;
2129        }
2130        *versionString++=(char)('0'+field);
2131    }
2132
2133    /* NUL-terminate */
2134    *versionString=0;
2135}
2136
2137U_CAPI void U_EXPORT2
2138u_getVersion(UVersionInfo versionArray) {
2139    u_versionFromString(versionArray, U_ICU_VERSION);
2140}
2141
2142/**
2143 * icucfg.h dependent code
2144 */
2145
2146#if U_ENABLE_DYLOAD
2147
2148#if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2149
2150#if HAVE_DLFCN_H
2151
2152#ifdef __MVS__
2153#ifndef __SUSV3
2154#define __SUSV3 1
2155#endif
2156#endif
2157#include <dlfcn.h>
2158#endif
2159
2160U_INTERNAL void * U_EXPORT2
2161uprv_dl_open(const char *libName, UErrorCode *status) {
2162  void *ret = NULL;
2163  if(U_FAILURE(*status)) return ret;
2164  ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2165  if(ret==NULL) {
2166#ifdef U_TRACE_DYLOAD
2167    printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2168#endif
2169    *status = U_MISSING_RESOURCE_ERROR;
2170  }
2171  return ret;
2172}
2173
2174U_INTERNAL void U_EXPORT2
2175uprv_dl_close(void *lib, UErrorCode *status) {
2176  if(U_FAILURE(*status)) return;
2177  dlclose(lib);
2178}
2179
2180U_INTERNAL UVoidFunction* U_EXPORT2
2181uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2182  union {
2183      UVoidFunction *fp;
2184      void *vp;
2185  } uret;
2186  uret.fp = NULL;
2187  if(U_FAILURE(*status)) return uret.fp;
2188  uret.vp = dlsym(lib, sym);
2189  if(uret.vp == NULL) {
2190#ifdef U_TRACE_DYLOAD
2191    printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2192#endif
2193    *status = U_MISSING_RESOURCE_ERROR;
2194  }
2195  return uret.fp;
2196}
2197
2198#else
2199
2200/* null (nonexistent) implementation. */
2201
2202U_INTERNAL void * U_EXPORT2
2203uprv_dl_open(const char *libName, UErrorCode *status) {
2204  if(U_FAILURE(*status)) return NULL;
2205  *status = U_UNSUPPORTED_ERROR;
2206  return NULL;
2207}
2208
2209U_INTERNAL void U_EXPORT2
2210uprv_dl_close(void *lib, UErrorCode *status) {
2211  if(U_FAILURE(*status)) return;
2212  *status = U_UNSUPPORTED_ERROR;
2213  return;
2214}
2215
2216
2217U_INTERNAL UVoidFunction* U_EXPORT2
2218uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2219  if(U_SUCCESS(*status)) {
2220    *status = U_UNSUPPORTED_ERROR;
2221  }
2222  return (UVoidFunction*)NULL;
2223}
2224
2225
2226
2227#endif
2228
2229#elif U_PLATFORM_USES_ONLY_WIN32_API
2230
2231U_INTERNAL void * U_EXPORT2
2232uprv_dl_open(const char *libName, UErrorCode *status) {
2233  HMODULE lib = NULL;
2234
2235  if(U_FAILURE(*status)) return NULL;
2236
2237  lib = LoadLibraryA(libName);
2238
2239  if(lib==NULL) {
2240    *status = U_MISSING_RESOURCE_ERROR;
2241  }
2242
2243  return (void*)lib;
2244}
2245
2246U_INTERNAL void U_EXPORT2
2247uprv_dl_close(void *lib, UErrorCode *status) {
2248  HMODULE handle = (HMODULE)lib;
2249  if(U_FAILURE(*status)) return;
2250
2251  FreeLibrary(handle);
2252
2253  return;
2254}
2255
2256
2257U_INTERNAL UVoidFunction* U_EXPORT2
2258uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2259  HMODULE handle = (HMODULE)lib;
2260  UVoidFunction* addr = NULL;
2261
2262  if(U_FAILURE(*status) || lib==NULL) return NULL;
2263
2264  addr = (UVoidFunction*)GetProcAddress(handle, sym);
2265
2266  if(addr==NULL) {
2267    DWORD lastError = GetLastError();
2268    if(lastError == ERROR_PROC_NOT_FOUND) {
2269      *status = U_MISSING_RESOURCE_ERROR;
2270    } else {
2271      *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2272    }
2273  }
2274
2275  return addr;
2276}
2277
2278
2279#else
2280
2281/* No dynamic loading set. */
2282
2283U_INTERNAL void * U_EXPORT2
2284uprv_dl_open(const char *libName, UErrorCode *status) {
2285    if(U_FAILURE(*status)) return NULL;
2286    *status = U_UNSUPPORTED_ERROR;
2287    return NULL;
2288}
2289
2290U_INTERNAL void U_EXPORT2
2291uprv_dl_close(void *lib, UErrorCode *status) {
2292    if(U_FAILURE(*status)) return;
2293    *status = U_UNSUPPORTED_ERROR;
2294    return;
2295}
2296
2297
2298U_INTERNAL UVoidFunction* U_EXPORT2
2299uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2300  if(U_SUCCESS(*status)) {
2301    *status = U_UNSUPPORTED_ERROR;
2302  }
2303  return (UVoidFunction*)NULL;
2304}
2305
2306#endif /* U_ENABLE_DYLOAD */
2307
2308/*
2309 * Hey, Emacs, please set the following:
2310 *
2311 * Local Variables:
2312 * indent-tabs-mode: nil
2313 * End:
2314 *
2315 */
2316