1/*
2******************************************************************************
3*
4*   Copyright (C) 1997-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*
9*  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10*
11*   Date        Name        Description
12*   04/14/97    aliu        Creation.
13*   04/24/97    aliu        Added getDefaultDataDirectory() and
14*                            getDefaultLocaleID().
15*   04/28/97    aliu        Rewritten to assume Unix and apply general methods
16*                            for assumed case.  Non-UNIX platforms must be
17*                            special-cased.  Rewrote numeric methods dealing
18*                            with NaN and Infinity to be platform independent
19*                             over all IEEE 754 platforms.
20*   05/13/97    aliu        Restored sign of timezone
21*                            (semantics are hours West of GMT)
22*   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23*                             nextDouble..
24*   07/22/98    stephen     Added remainder, max, min, trunc
25*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
26*   08/24/98    stephen     Added longBitsFromDouble
27*   09/08/98    stephen     Minor changes for Mac Port
28*   03/02/99    stephen     Removed openFile().  Added AS400 support.
29*                            Fixed EBCDIC tables
30*   04/15/99    stephen     Converted to C.
31*   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
32*   08/04/99    jeffrey R.  Added OS/2 changes
33*   11/15/99    helena      Integrated S/390 IEEE support.
34*   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
35*   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
36*   01/03/08    Steven L.   Fake Time Support
37******************************************************************************
38*/
39
40/* Define _XOPEN_SOURCE for Solaris and friends. */
41/* NetBSD needs it to be >= 4 */
42#if !defined(_XOPEN_SOURCE)
43#if __STDC_VERSION__ >= 199901L
44/* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
45#define _XOPEN_SOURCE 600
46#else
47#define _XOPEN_SOURCE 4
48#endif
49#endif
50
51/* Make sure things like readlink and such functions work.
52Poorly upgraded Solaris machines can't have this defined.
53Cleanly installed Solaris can use this #define.
54*/
55#if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__))
56#define _XOPEN_SOURCE_EXTENDED 1
57#endif
58
59/* include ICU headers */
60#include "unicode/utypes.h"
61#include "unicode/putil.h"
62#include "unicode/ustring.h"
63#include "putilimp.h"
64#include "uassert.h"
65#include "umutex.h"
66#include "cmemory.h"
67#include "cstring.h"
68#include "locmap.h"
69#include "ucln_cmn.h"
70
71/* Include standard headers. */
72#include <stdio.h>
73#include <stdlib.h>
74#include <string.h>
75#include <math.h>
76#include <locale.h>
77#include <float.h>
78
79/* include system headers */
80#if defined(U_WINDOWS) || defined(U_MINGW)
81#   define WIN32_LEAN_AND_MEAN
82#   define VC_EXTRALEAN
83#   define NOUSER
84#   define NOSERVICE
85#   define NOIME
86#   define NOMCX
87#   include <windows.h>
88#   include "wintz.h"
89#elif defined(OS400)
90#   include <float.h>
91#   include <qusec.h>       /* error code structure */
92#   include <qusrjobi.h>
93#   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
94#   include <mih/testptr.h> /* For uprv_maximumPtr */
95#elif defined(XP_MAC)
96#   include <Files.h>
97#   include <IntlResources.h>
98#   include <Script.h>
99#   include <Folders.h>
100#   include <MacTypes.h>
101#   include <TextUtils.h>
102#   define ICU_NO_USER_DATA_OVERRIDE 1
103#elif defined(OS390)
104#   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
105#elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
106#   include <limits.h>
107#   include <unistd.h>
108#elif defined(U_QNX)
109#   include <sys/neutrino.h>
110#elif defined(U_SOLARIS)
111#   ifndef _XPG4_2
112#       define _XPG4_2
113#   endif
114#endif
115
116#if (defined(U_CYGWIN) || defined(U_MINGW)) && defined(__STRICT_ANSI__)
117/* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
118#undef __STRICT_ANSI__
119#endif
120
121/*
122 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
123 */
124#include <time.h>
125
126#if defined(U_DARWIN)
127#include <TargetConditionals.h>
128#endif
129
130#ifndef U_WINDOWS
131#include <sys/time.h>
132#endif
133
134/*
135 * Only include langinfo.h if we have a way to get the codeset. If we later
136 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
137 *
138 */
139
140#if U_HAVE_NL_LANGINFO_CODESET
141#include <langinfo.h>
142#endif
143
144/**
145 * Simple things (presence of functions, etc) should just go in configure.in and be added to
146 * icucfg.h via autoheader.
147 */
148#if defined(U_HAVE_ICUCFG)
149#include "icucfg.h"
150#endif
151
152/* Define the extension for data files, again... */
153#define DATA_TYPE "dat"
154
155/* Leave this copyright notice here! */
156static const char copyright[] = U_COPYRIGHT_STRING;
157
158/* floating point implementations ------------------------------------------- */
159
160/* We return QNAN rather than SNAN*/
161#define SIGN 0x80000000U
162
163/* Make it easy to define certain types of constants */
164typedef union {
165    int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
166    double d64;
167} BitPatternConversion;
168static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
169static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
170
171/*---------------------------------------------------------------------------
172  Platform utilities
173  Our general strategy is to assume we're on a POSIX platform.  Platforms which
174  are non-POSIX must declare themselves so.  The default POSIX implementation
175  will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
176  functions).
177  ---------------------------------------------------------------------------*/
178
179#if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400) || defined(U_MINGW)
180#   undef U_POSIX_LOCALE
181#else
182#   define U_POSIX_LOCALE    1
183#endif
184
185/*
186    WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
187    can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
188*/
189#if !IEEE_754
190static char*
191u_topNBytesOfDouble(double* d, int n)
192{
193#if U_IS_BIG_ENDIAN
194    return (char*)d;
195#else
196    return (char*)(d + 1) - n;
197#endif
198}
199
200static char*
201u_bottomNBytesOfDouble(double* d, int n)
202{
203#if U_IS_BIG_ENDIAN
204    return (char*)(d + 1) - n;
205#else
206    return (char*)d;
207#endif
208}
209#endif   /* !IEEE_754 */
210
211#if IEEE_754
212static UBool
213u_signBit(double d) {
214    uint8_t hiByte;
215#if U_IS_BIG_ENDIAN
216    hiByte = *(uint8_t *)&d;
217#else
218    hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
219#endif
220    return (hiByte & 0x80) != 0;
221}
222#endif
223
224
225
226#if defined (U_DEBUG_FAKETIME)
227/* Override the clock to test things without having to move the system clock.
228 * Assumes POSIX gettimeofday() will function
229 */
230UDate fakeClock_t0 = 0; /** Time to start the clock from **/
231UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
232UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
233static UMTX fakeClockMutex = NULL;
234
235static UDate getUTCtime_real() {
236    struct timeval posixTime;
237    gettimeofday(&posixTime, NULL);
238    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
239}
240
241static UDate getUTCtime_fake() {
242    umtx_lock(&fakeClockMutex);
243    if(!fakeClock_set) {
244        UDate real = getUTCtime_real();
245        const char *fake_start = getenv("U_FAKETIME_START");
246        if((fake_start!=NULL) && (fake_start[0]!=0)) {
247            sscanf(fake_start,"%lf",&fakeClock_t0);
248            fakeClock_dt = fakeClock_t0 - real;
249            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
250                    "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
251                    fakeClock_t0, fake_start, fakeClock_dt, real);
252        } else {
253          fakeClock_dt = 0;
254            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
255                    "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
256        }
257        fakeClock_set = TRUE;
258    }
259    umtx_unlock(&fakeClockMutex);
260
261    return getUTCtime_real() + fakeClock_dt;
262}
263#endif
264
265#if defined(U_WINDOWS)
266typedef union {
267    int64_t int64;
268    FILETIME fileTime;
269} FileTimeConversion;   /* This is like a ULARGE_INTEGER */
270
271/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
272#define EPOCH_BIAS  INT64_C(116444736000000000)
273#define HECTONANOSECOND_PER_MILLISECOND   10000
274
275#endif
276
277/*---------------------------------------------------------------------------
278  Universal Implementations
279  These are designed to work on all platforms.  Try these, and if they
280  don't work on your platform, then special case your platform with new
281  implementations.
282---------------------------------------------------------------------------*/
283
284U_CAPI UDate U_EXPORT2
285uprv_getUTCtime()
286{
287#if defined(U_DEBUG_FAKETIME)
288    return getUTCtime_fake(); /* Hook for overriding the clock */
289#else
290    return uprv_getRawUTCtime();
291#endif
292}
293
294/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
295U_CAPI UDate U_EXPORT2
296uprv_getRawUTCtime()
297{
298#if defined(XP_MAC)
299    time_t t, t1, t2;
300    struct tm tmrec;
301
302    uprv_memset( &tmrec, 0, sizeof(tmrec) );
303    tmrec.tm_year = 70;
304    tmrec.tm_mon = 0;
305    tmrec.tm_mday = 1;
306    t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
307
308    time(&t);
309    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
310    t2 = mktime(&tmrec);    /* seconds of current GMT*/
311    return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
312#elif defined(U_WINDOWS)
313
314    FileTimeConversion winTime;
315    GetSystemTimeAsFileTime(&winTime.fileTime);
316    return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
317#else
318
319#if defined(HAVE_GETTIMEOFDAY)
320    struct timeval posixTime;
321    gettimeofday(&posixTime, NULL);
322    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
323#else
324    time_t epochtime;
325    time(&epochtime);
326    return (UDate)epochtime * U_MILLIS_PER_SECOND;
327#endif
328
329#endif
330}
331
332/*-----------------------------------------------------------------------------
333  IEEE 754
334  These methods detect and return NaN and infinity values for doubles
335  conforming to IEEE 754.  Platforms which support this standard include X86,
336  Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
337  If this doesn't work on your platform, you have non-IEEE floating-point, and
338  will need to code your own versions.  A naive implementation is to return 0.0
339  for getNaN and getInfinity, and false for isNaN and isInfinite.
340  ---------------------------------------------------------------------------*/
341
342U_CAPI UBool U_EXPORT2
343uprv_isNaN(double number)
344{
345#if IEEE_754
346    BitPatternConversion convertedNumber;
347    convertedNumber.d64 = number;
348    /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
349    return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
350
351#elif defined(OS390)
352    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
353                        sizeof(uint32_t));
354    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
355                        sizeof(uint32_t));
356
357    return ((highBits & 0x7F080000L) == 0x7F080000L) &&
358      (lowBits == 0x00000000L);
359
360#else
361    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
362    /* you'll need to replace this default implementation with what's correct*/
363    /* for your platform.*/
364    return number != number;
365#endif
366}
367
368U_CAPI UBool U_EXPORT2
369uprv_isInfinite(double number)
370{
371#if IEEE_754
372    BitPatternConversion convertedNumber;
373    convertedNumber.d64 = number;
374    /* Infinity is exactly 0x7FF0000000000000U. */
375    return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
376#elif defined(OS390)
377    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
378                        sizeof(uint32_t));
379    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
380                        sizeof(uint32_t));
381
382    return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
383
384#else
385    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
386    /* value, you'll need to replace this default implementation with what's*/
387    /* correct for your platform.*/
388    return number == (2.0 * number);
389#endif
390}
391
392U_CAPI UBool U_EXPORT2
393uprv_isPositiveInfinity(double number)
394{
395#if IEEE_754 || defined(OS390)
396    return (UBool)(number > 0 && uprv_isInfinite(number));
397#else
398    return uprv_isInfinite(number);
399#endif
400}
401
402U_CAPI UBool U_EXPORT2
403uprv_isNegativeInfinity(double number)
404{
405#if IEEE_754 || defined(OS390)
406    return (UBool)(number < 0 && uprv_isInfinite(number));
407
408#else
409    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
410                        sizeof(uint32_t));
411    return((highBits & SIGN) && uprv_isInfinite(number));
412
413#endif
414}
415
416U_CAPI double U_EXPORT2
417uprv_getNaN()
418{
419#if IEEE_754 || defined(OS390)
420    return gNan.d64;
421#else
422    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
423    /* you'll need to replace this default implementation with what's correct*/
424    /* for your platform.*/
425    return 0.0;
426#endif
427}
428
429U_CAPI double U_EXPORT2
430uprv_getInfinity()
431{
432#if IEEE_754 || defined(OS390)
433    return gInf.d64;
434#else
435    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
436    /* value, you'll need to replace this default implementation with what's*/
437    /* correct for your platform.*/
438    return 0.0;
439#endif
440}
441
442U_CAPI double U_EXPORT2
443uprv_floor(double x)
444{
445    return floor(x);
446}
447
448U_CAPI double U_EXPORT2
449uprv_ceil(double x)
450{
451    return ceil(x);
452}
453
454U_CAPI double U_EXPORT2
455uprv_round(double x)
456{
457    return uprv_floor(x + 0.5);
458}
459
460U_CAPI double U_EXPORT2
461uprv_fabs(double x)
462{
463    return fabs(x);
464}
465
466U_CAPI double U_EXPORT2
467uprv_modf(double x, double* y)
468{
469    return modf(x, y);
470}
471
472U_CAPI double U_EXPORT2
473uprv_fmod(double x, double y)
474{
475    return fmod(x, y);
476}
477
478U_CAPI double U_EXPORT2
479uprv_pow(double x, double y)
480{
481    /* This is declared as "double pow(double x, double y)" */
482    return pow(x, y);
483}
484
485U_CAPI double U_EXPORT2
486uprv_pow10(int32_t x)
487{
488    return pow(10.0, (double)x);
489}
490
491U_CAPI double U_EXPORT2
492uprv_fmax(double x, double y)
493{
494#if IEEE_754
495    /* first handle NaN*/
496    if(uprv_isNaN(x) || uprv_isNaN(y))
497        return uprv_getNaN();
498
499    /* check for -0 and 0*/
500    if(x == 0.0 && y == 0.0 && u_signBit(x))
501        return y;
502
503#endif
504
505    /* this should work for all flt point w/o NaN and Inf special cases */
506    return (x > y ? x : y);
507}
508
509U_CAPI double U_EXPORT2
510uprv_fmin(double x, double y)
511{
512#if IEEE_754
513    /* first handle NaN*/
514    if(uprv_isNaN(x) || uprv_isNaN(y))
515        return uprv_getNaN();
516
517    /* check for -0 and 0*/
518    if(x == 0.0 && y == 0.0 && u_signBit(y))
519        return y;
520
521#endif
522
523    /* this should work for all flt point w/o NaN and Inf special cases */
524    return (x > y ? y : x);
525}
526
527/**
528 * Truncates the given double.
529 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
530 * This is different than calling floor() or ceil():
531 * floor(3.3) = 3, floor(-3.3) = -4
532 * ceil(3.3) = 4, ceil(-3.3) = -3
533 */
534U_CAPI double U_EXPORT2
535uprv_trunc(double d)
536{
537#if IEEE_754
538    /* handle error cases*/
539    if(uprv_isNaN(d))
540        return uprv_getNaN();
541    if(uprv_isInfinite(d))
542        return uprv_getInfinity();
543
544    if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
545        return ceil(d);
546    else
547        return floor(d);
548
549#else
550    return d >= 0 ? floor(d) : ceil(d);
551
552#endif
553}
554
555/**
556 * Return the largest positive number that can be represented by an integer
557 * type of arbitrary bit length.
558 */
559U_CAPI double U_EXPORT2
560uprv_maxMantissa(void)
561{
562    return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
563}
564
565U_CAPI double U_EXPORT2
566uprv_log(double d)
567{
568    return log(d);
569}
570
571U_CAPI void * U_EXPORT2
572uprv_maximumPtr(void * base)
573{
574#if defined(OS400)
575    /*
576     * With the provided function we should never be out of range of a given segment
577     * (a traditional/typical segment that is).  Our segments have 5 bytes for the
578     * id and 3 bytes for the offset.  The key is that the casting takes care of
579     * only retrieving the offset portion minus x1000.  Hence, the smallest offset
580     * seen in a program is x001000 and when casted to an int would be 0.
581     * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
582     *
583     * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
584     * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
585     * This function determines the activation based on the pointer that is passed in and
586     * calculates the appropriate maximum available size for
587     * each pointer type (TERASPACE and non-TERASPACE)
588     *
589     * Unlike other operating systems, the pointer model isn't determined at
590     * compile time on i5/OS.
591     */
592    if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
593        /* if it is a TERASPACE pointer the max is 2GB - 4k */
594        return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
595    }
596    /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
597    return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
598
599#else
600    return U_MAX_PTR(base);
601#endif
602}
603
604/*---------------------------------------------------------------------------
605  Platform-specific Implementations
606  Try these, and if they don't work on your platform, then special case your
607  platform with new implementations.
608  ---------------------------------------------------------------------------*/
609
610/* Generic time zone layer -------------------------------------------------- */
611
612/* Time zone utilities */
613U_CAPI void U_EXPORT2
614uprv_tzset()
615{
616#if defined(U_TZSET)
617    U_TZSET();
618#else
619    /* no initialization*/
620#endif
621}
622
623U_CAPI int32_t U_EXPORT2
624uprv_timezone()
625{
626#ifdef U_TIMEZONE
627    return U_TIMEZONE;
628#else
629    time_t t, t1, t2;
630    struct tm tmrec;
631    UBool dst_checked;
632    int32_t tdiff = 0;
633
634    time(&t);
635    uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
636    dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
637    t1 = mktime(&tmrec);                 /* local time in seconds*/
638    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
639    t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
640    tdiff = t2 - t1;
641    /* imitate NT behaviour, which returns same timezone offset to GMT for
642       winter and summer*/
643    if (dst_checked)
644        tdiff += 3600;
645    return tdiff;
646#endif
647}
648
649/* Note that U_TZNAME does *not* have to be tzname, but if it is,
650   some platforms need to have it declared here. */
651
652#if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
653/* RS6000 and others reject char **tzname.  */
654extern U_IMPORT char *U_TZNAME[];
655#endif
656
657#if !UCONFIG_NO_FILE_IO && (defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD))
658/* These platforms are likely to use Olson timezone IDs. */
659#define CHECK_LOCALTIME_LINK 1
660#if defined(U_DARWIN)
661#include <tzfile.h>
662#define TZZONEINFO      (TZDIR "/")
663#else
664#define TZDEFAULT       "/etc/localtime"
665#define TZZONEINFO      "/usr/share/zoneinfo/"
666#endif
667#if U_HAVE_DIRENT_H
668#define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
669/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
670   symlinked to /etc/localtime, which makes searchForTZFile return
671   'localtime' when it's the first match. */
672#define TZFILE_SKIP2    "localtime"
673#define SEARCH_TZFILE
674#include <dirent.h>  /* Needed to search through system timezone files */
675#endif
676static char gTimeZoneBuffer[PATH_MAX];
677static char *gTimeZoneBufferPtr = NULL;
678#endif
679
680#ifndef U_WINDOWS
681#define isNonDigit(ch) (ch < '0' || '9' < ch)
682static UBool isValidOlsonID(const char *id) {
683    int32_t idx = 0;
684
685    /* Determine if this is something like Iceland (Olson ID)
686    or AST4ADT (non-Olson ID) */
687    while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
688        idx++;
689    }
690
691    /* If we went through the whole string, then it might be okay.
692    The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
693    "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
694    The rest of the time it could be an Olson ID. George */
695    return (UBool)(id[idx] == 0
696        || uprv_strcmp(id, "PST8PDT") == 0
697        || uprv_strcmp(id, "MST7MDT") == 0
698        || uprv_strcmp(id, "CST6CDT") == 0
699        || uprv_strcmp(id, "EST5EDT") == 0);
700}
701
702/* On some Unix-like OS, 'posix' subdirectory in
703   /usr/share/zoneinfo replicates the top-level contents. 'right'
704   subdirectory has the same set of files, but individual files
705   are different from those in the top-level directory or 'posix'
706   because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
707   has files for UTC.
708   When the first match for /etc/localtime is in either of them
709   (usually in posix because 'right' has different file contents),
710   or TZ environment variable points to one of them, createTimeZone
711   fails because, say, 'posix/America/New_York' is not an Olson
712   timezone id ('America/New_York' is). So, we have to skip
713   'posix/' and 'right/' at the beginning. */
714static void skipZoneIDPrefix(const char** id) {
715    if (uprv_strncmp(*id, "posix/", 6) == 0
716        || uprv_strncmp(*id, "right/", 6) == 0)
717    {
718        *id += 6;
719    }
720}
721#endif
722
723#if defined(U_TZNAME) && !defined(U_WINDOWS)
724
725#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
726typedef struct OffsetZoneMapping {
727    int32_t offsetSeconds;
728    int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
729    const char *stdID;
730    const char *dstID;
731    const char *olsonID;
732} OffsetZoneMapping;
733
734/*
735This list tries to disambiguate a set of abbreviated timezone IDs and offsets
736and maps it to an Olson ID.
737Before adding anything to this list, take a look at
738icu/source/tools/tzcode/tz.alias
739Sometimes no daylight savings (0) is important to define due to aliases.
740This list can be tested with icu/source/test/compat/tzone.pl
741More values could be added to daylightType to increase precision.
742*/
743static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
744    {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
745    {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
746    {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
747    {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
748    {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
749    {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
750    {-36000, 2, "EST", "EST", "Australia/Sydney"},
751    {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
752    {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
753    {-34200, 2, "CST", "CST", "Australia/South"},
754    {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
755    {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
756    {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
757    {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
758    {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
759    {-28800, 2, "WST", "WST", "Australia/West"},
760    {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
761    {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
762    {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
763    {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
764    {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
765    {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
766    {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
767    {-14400, 1, "AZT", "AZST", "Asia/Baku"},
768    {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
769    {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
770    {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
771    {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
772    {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
773    {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
774    {-3600, 0, "CET", "WEST", "Africa/Algiers"},
775    {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
776    {0, 1, "GMT", "IST", "Europe/Dublin"},
777    {0, 1, "GMT", "BST", "Europe/London"},
778    {0, 0, "WET", "WEST", "Africa/Casablanca"},
779    {0, 0, "WET", "WET", "Africa/El_Aaiun"},
780    {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
781    {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
782    {10800, 1, "PMST", "PMDT", "America/Miquelon"},
783    {10800, 2, "UYT", "UYST", "America/Montevideo"},
784    {10800, 1, "WGT", "WGST", "America/Godthab"},
785    {10800, 2, "BRT", "BRST", "Brazil/East"},
786    {12600, 1, "NST", "NDT", "America/St_Johns"},
787    {14400, 1, "AST", "ADT", "Canada/Atlantic"},
788    {14400, 2, "AMT", "AMST", "America/Cuiaba"},
789    {14400, 2, "CLT", "CLST", "Chile/Continental"},
790    {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
791    {14400, 2, "PYT", "PYST", "America/Asuncion"},
792    {18000, 1, "CST", "CDT", "America/Havana"},
793    {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
794    {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
795    {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
796    {21600, 0, "CST", "CDT", "America/Guatemala"},
797    {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
798    {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
799    {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
800    {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
801    {32400, 1, "AKST", "AKDT", "US/Alaska"},
802    {36000, 1, "HAST", "HADT", "US/Aleutian"}
803};
804
805/*#define DEBUG_TZNAME*/
806
807static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
808{
809    int32_t idx;
810#ifdef DEBUG_TZNAME
811    fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
812#endif
813    for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
814    {
815        if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
816            && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
817            && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
818            && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
819        {
820            return OFFSET_ZONE_MAPPINGS[idx].olsonID;
821        }
822    }
823    return NULL;
824}
825#endif
826
827#ifdef SEARCH_TZFILE
828#define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
829#define MAX_READ_SIZE 512
830
831typedef struct DefaultTZInfo {
832    char* defaultTZBuffer;
833    int64_t defaultTZFileSize;
834    FILE* defaultTZFilePtr;
835    UBool defaultTZstatus;
836    int32_t defaultTZPosition;
837} DefaultTZInfo;
838
839/*
840 * This method compares the two files given to see if they are a match.
841 * It is currently use to compare two TZ files.
842 */
843static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
844    FILE* file;
845    int64_t sizeFile;
846    int64_t sizeFileLeft;
847    int32_t sizeFileRead;
848    int32_t sizeFileToRead;
849    char bufferFile[MAX_READ_SIZE];
850    UBool result = TRUE;
851
852    if (tzInfo->defaultTZFilePtr == NULL) {
853        tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
854    }
855    file = fopen(TZFileName, "r");
856
857    tzInfo->defaultTZPosition = 0; /* reset position to begin search */
858
859    if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
860        /* First check that the file size are equal. */
861        if (tzInfo->defaultTZFileSize == 0) {
862            fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
863            tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
864        }
865        fseek(file, 0, SEEK_END);
866        sizeFile = ftell(file);
867        sizeFileLeft = sizeFile;
868
869        if (sizeFile != tzInfo->defaultTZFileSize) {
870            result = FALSE;
871        } else {
872            /* Store the data from the files in seperate buffers and
873             * compare each byte to determine equality.
874             */
875            if (tzInfo->defaultTZBuffer == NULL) {
876                rewind(tzInfo->defaultTZFilePtr);
877                tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
878                sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
879            }
880            rewind(file);
881            while(sizeFileLeft > 0) {
882                uprv_memset(bufferFile, 0, MAX_READ_SIZE);
883                sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
884
885                sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
886                if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
887                    result = FALSE;
888                    break;
889                }
890                sizeFileLeft -= sizeFileRead;
891                tzInfo->defaultTZPosition += sizeFileRead;
892            }
893        }
894    } else {
895        result = FALSE;
896    }
897
898    if (file != NULL) {
899        fclose(file);
900    }
901
902    return result;
903}
904/*
905 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
906 */
907/* dirent also lists two entries: "." and ".." that we can safely ignore. */
908#define SKIP1 "."
909#define SKIP2 ".."
910static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
911static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
912    char curpath[MAX_PATH_SIZE];
913    DIR* dirp = opendir(path);
914    DIR* subDirp = NULL;
915    struct dirent* dirEntry = NULL;
916
917    char* result = NULL;
918    if (dirp == NULL) {
919        return result;
920    }
921
922    /* Save the current path */
923    uprv_memset(curpath, 0, MAX_PATH_SIZE);
924    uprv_strcpy(curpath, path);
925
926    /* Check each entry in the directory. */
927    while((dirEntry = readdir(dirp)) != NULL) {
928        const char* dirName = dirEntry->d_name;
929        if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
930            /* Create a newpath with the new entry to test each entry in the directory. */
931            char newpath[MAX_PATH_SIZE];
932            uprv_strcpy(newpath, curpath);
933            uprv_strcat(newpath, dirName);
934
935            if ((subDirp = opendir(newpath)) != NULL) {
936                /* If this new path is a directory, make a recursive call with the newpath. */
937                closedir(subDirp);
938                uprv_strcat(newpath, "/");
939                result = searchForTZFile(newpath, tzInfo);
940                /*
941                 Have to get out here. Otherwise, we'd keep looking
942                 and return the first match in the top-level directory
943                 if there's a match in the top-level. If not, this function
944                 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
945                 It worked without this in most cases because we have a fallback of calling
946                 localtime_r to figure out the default timezone.
947                */
948                if (result != NULL)
949                    break;
950            } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
951                if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
952                    const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
953                    skipZoneIDPrefix(&zoneid);
954                    uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
955                    result = SEARCH_TZFILE_RESULT;
956                    /* Get out after the first one found. */
957                    break;
958                }
959            }
960        }
961    }
962    closedir(dirp);
963    return result;
964}
965#endif
966U_CAPI const char* U_EXPORT2
967uprv_tzname(int n)
968{
969    const char *tzid = NULL;
970#ifdef U_WINDOWS
971    tzid = uprv_detectWindowsTimeZone();
972
973    if (tzid != NULL) {
974        return tzid;
975    }
976#else
977
978/*#if defined(U_DARWIN)
979    int ret;
980
981    tzid = getenv("TZFILE");
982    if (tzid != NULL) {
983        return tzid;
984    }
985#endif*/
986
987/* This code can be temporarily disabled to test tzname resolution later on. */
988#ifndef DEBUG_TZNAME
989    tzid = getenv("TZ");
990    if (tzid != NULL && isValidOlsonID(tzid))
991    {
992        /* This might be a good Olson ID. */
993        skipZoneIDPrefix(&tzid);
994        return tzid;
995    }
996    /* else U_TZNAME will give a better result. */
997#endif
998
999#if defined(CHECK_LOCALTIME_LINK)
1000    /* Caller must handle threading issues */
1001    if (gTimeZoneBufferPtr == NULL) {
1002        /*
1003        This is a trick to look at the name of the link to get the Olson ID
1004        because the tzfile contents is underspecified.
1005        This isn't guaranteed to work because it may not be a symlink.
1006        */
1007        int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1008        if (0 < ret) {
1009            int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1010            gTimeZoneBuffer[ret] = 0;
1011            if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1012                && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1013            {
1014                return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1015            }
1016        } else {
1017#if defined(SEARCH_TZFILE)
1018            DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1019            if (tzInfo != NULL) {
1020                tzInfo->defaultTZBuffer = NULL;
1021                tzInfo->defaultTZFileSize = 0;
1022                tzInfo->defaultTZFilePtr = NULL;
1023                tzInfo->defaultTZstatus = FALSE;
1024                tzInfo->defaultTZPosition = 0;
1025
1026                gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1027
1028                /* Free previously allocated memory */
1029                if (tzInfo->defaultTZBuffer != NULL) {
1030                    uprv_free(tzInfo->defaultTZBuffer);
1031                }
1032                if (tzInfo->defaultTZFilePtr != NULL) {
1033                    fclose(tzInfo->defaultTZFilePtr);
1034                }
1035                uprv_free(tzInfo);
1036            }
1037
1038            if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1039                return gTimeZoneBufferPtr;
1040            }
1041#endif
1042        }
1043    }
1044    else {
1045        return gTimeZoneBufferPtr;
1046    }
1047#endif
1048#endif
1049
1050#ifdef U_TZNAME
1051#if defined(U_WINDOWS) || defined(U_MINGW)
1052    /* The return value is free'd in timezone.cpp on Windows because
1053     * the other code path returns a pointer to a heap location. */
1054    return uprv_strdup(U_TZNAME[n]);
1055#else
1056    /*
1057    U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1058    So we remap the abbreviation to an olson ID.
1059
1060    Since Windows exposes a little more timezone information,
1061    we normally don't use this code on Windows because
1062    uprv_detectWindowsTimeZone should have already given the correct answer.
1063    */
1064    {
1065        struct tm juneSol, decemberSol;
1066        int daylightType;
1067        static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1068        static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1069
1070        /* This probing will tell us when daylight savings occurs.  */
1071        localtime_r(&juneSolstice, &juneSol);
1072        localtime_r(&decemberSolstice, &decemberSol);
1073        daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
1074        tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1075        if (tzid != NULL) {
1076            return tzid;
1077        }
1078    }
1079    return U_TZNAME[n];
1080#endif
1081#else
1082    return "";
1083#endif
1084}
1085
1086/* Get and set the ICU data directory --------------------------------------- */
1087
1088static char *gDataDirectory = NULL;
1089#if U_POSIX_LOCALE
1090 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1091#endif
1092
1093static UBool U_CALLCONV putil_cleanup(void)
1094{
1095    if (gDataDirectory && *gDataDirectory) {
1096        uprv_free(gDataDirectory);
1097    }
1098    gDataDirectory = NULL;
1099#if U_POSIX_LOCALE
1100    if (gCorrectedPOSIXLocale) {
1101        uprv_free(gCorrectedPOSIXLocale);
1102        gCorrectedPOSIXLocale = NULL;
1103    }
1104#endif
1105    return TRUE;
1106}
1107
1108/*
1109 * Set the data directory.
1110 *    Make a copy of the passed string, and set the global data dir to point to it.
1111 *    TODO:  see bug #2849, regarding thread safety.
1112 */
1113U_CAPI void U_EXPORT2
1114u_setDataDirectory(const char *directory) {
1115    char *newDataDir;
1116    int32_t length;
1117
1118    if(directory==NULL || *directory==0) {
1119        /* A small optimization to prevent the malloc and copy when the
1120        shared library is used, and this is a way to make sure that NULL
1121        is never returned.
1122        */
1123        newDataDir = (char *)"";
1124    }
1125    else {
1126        length=(int32_t)uprv_strlen(directory);
1127        newDataDir = (char *)uprv_malloc(length + 2);
1128        /* Exit out if newDataDir could not be created. */
1129        if (newDataDir == NULL) {
1130            return;
1131        }
1132        uprv_strcpy(newDataDir, directory);
1133
1134#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1135        {
1136            char *p;
1137            while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1138                *p = U_FILE_SEP_CHAR;
1139            }
1140        }
1141#endif
1142    }
1143
1144    umtx_lock(NULL);
1145    if (gDataDirectory && *gDataDirectory) {
1146        uprv_free(gDataDirectory);
1147    }
1148    gDataDirectory = newDataDir;
1149    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1150    umtx_unlock(NULL);
1151}
1152
1153U_CAPI UBool U_EXPORT2
1154uprv_pathIsAbsolute(const char *path)
1155{
1156  if(!path || !*path) {
1157    return FALSE;
1158  }
1159
1160  if(*path == U_FILE_SEP_CHAR) {
1161    return TRUE;
1162  }
1163
1164#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1165  if(*path == U_FILE_ALT_SEP_CHAR) {
1166    return TRUE;
1167  }
1168#endif
1169
1170#if defined(U_WINDOWS)
1171  if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1172       ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1173      path[1] == ':' ) {
1174    return TRUE;
1175  }
1176#endif
1177
1178  return FALSE;
1179}
1180
1181/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1182   until some client wrapper makefiles are updated */
1183#if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR
1184# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1185#  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1186# endif
1187#endif
1188
1189U_CAPI const char * U_EXPORT2
1190u_getDataDirectory(void) {
1191    const char *path = NULL;
1192#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1193    char datadir_path_buffer[PATH_MAX];
1194#endif
1195
1196    /* if we have the directory, then return it immediately */
1197    UMTX_CHECK(NULL, gDataDirectory, path);
1198
1199    if(path) {
1200        return path;
1201    }
1202
1203    /*
1204    When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1205    override ICU's data with the ICU_DATA environment variable. This prevents
1206    problems where multiple custom copies of ICU's specific version of data
1207    are installed on a system. Either the application must define the data
1208    directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1209    ICU, set the data with udata_setCommonData or trust that all of the
1210    required data is contained in ICU's data library that contains
1211    the entry point defined by U_ICUDATA_ENTRY_POINT.
1212
1213    There may also be some platforms where environment variables
1214    are not allowed.
1215    */
1216#   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1217    /* First try to get the environment variable */
1218    path=getenv("ICU_DATA");
1219#   endif
1220
1221    /* ICU_DATA_DIR may be set as a compile option.
1222     * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1223     * and is used only when data is built in archive mode eliminating the need
1224     * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1225     * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1226     * set their own path.
1227     */
1228#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1229    if(path==NULL || *path==0) {
1230# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1231        const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1232# endif
1233# ifdef ICU_DATA_DIR
1234        path=ICU_DATA_DIR;
1235# else
1236        path=U_ICU_DATA_DEFAULT_DIR;
1237# endif
1238# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1239        if (prefix != NULL) {
1240            snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1241            path=datadir_path_buffer;
1242        }
1243# endif
1244    }
1245#endif
1246
1247    if(path==NULL) {
1248        /* It looks really bad, set it to something. */
1249        path = "";
1250    }
1251
1252    u_setDataDirectory(path);
1253    return gDataDirectory;
1254}
1255
1256
1257
1258
1259
1260/* Macintosh-specific locale information ------------------------------------ */
1261#ifdef XP_MAC
1262
1263typedef struct {
1264    int32_t script;
1265    int32_t region;
1266    int32_t lang;
1267    int32_t date_region;
1268    const char* posixID;
1269} mac_lc_rec;
1270
1271/* Todo: This will be updated with a newer version from www.unicode.org web
1272   page when it's available.*/
1273#define MAC_LC_MAGIC_NUMBER -5
1274#define MAC_LC_INIT_NUMBER -9
1275
1276static const mac_lc_rec mac_lc_recs[] = {
1277    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1278    /* United States*/
1279    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1280    /* France*/
1281    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1282    /* Great Britain*/
1283    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1284    /* Germany*/
1285    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1286    /* Italy*/
1287    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1288    /* Metherlands*/
1289    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1290    /* French for Belgium or Lxembourg*/
1291    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1292    /* Sweden*/
1293    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1294    /* Denmark*/
1295    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1296    /* Portugal*/
1297    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1298    /* French Canada*/
1299    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1300    /* Israel*/
1301    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1302    /* Japan*/
1303    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1304    /* Australia*/
1305    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1306    /* the Arabic world (?)*/
1307    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1308    /* Finland*/
1309    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1310    /* French for Switzerland*/
1311    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1312    /* German for Switzerland*/
1313    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1314    /* Greece*/
1315    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1316    /* Iceland ===*/
1317    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1318    /* Malta ===*/
1319    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1320    /* Cyprus ===*/
1321    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1322    /* Turkey ===*/
1323    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1324    /* Croatian system for Yugoslavia*/
1325    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1326    /* Hindi system for India*/
1327    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1328    /* Pakistan*/
1329    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1330    /* Lithuania*/
1331    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1332    /* Poland*/
1333    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1334    /* Hungary*/
1335    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1336    /* Estonia*/
1337    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1338    /* Latvia*/
1339    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1340    /* Lapland  [Ask Rich for the data. HS]*/
1341    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1342    /* Faeroe Islands*/
1343    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1344    /* Iran*/
1345    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1346    /* Russia*/
1347    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1348    /* Ireland*/
1349    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1350    /* Korea*/
1351    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1352    /* People's Republic of China*/
1353    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1354    /* Taiwan*/
1355    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1356    /* Thailand*/
1357
1358    /* fallback is en_US*/
1359    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1360    MAC_LC_MAGIC_NUMBER, "en_US"
1361};
1362
1363#endif
1364
1365#if U_POSIX_LOCALE
1366/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1367 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1368 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1369 */
1370static const char *uprv_getPOSIXIDForCategory(int category)
1371{
1372    const char* posixID = NULL;
1373    if (category == LC_MESSAGES || category == LC_CTYPE) {
1374        /*
1375        * On Solaris two different calls to setlocale can result in
1376        * different values. Only get this value once.
1377        *
1378        * We must check this first because an application can set this.
1379        *
1380        * LC_ALL can't be used because it's platform dependent. The LANG
1381        * environment variable seems to affect LC_CTYPE variable by default.
1382        * Here is what setlocale(LC_ALL, NULL) can return.
1383        * HPUX can return 'C C C C C C C'
1384        * Solaris can return /en_US/C/C/C/C/C on the second try.
1385        * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1386        *
1387        * The default codepage detection also needs to use LC_CTYPE.
1388        *
1389        * Do not call setlocale(LC_*, "")! Using an empty string instead
1390        * of NULL, will modify the libc behavior.
1391        */
1392        posixID = setlocale(category, NULL);
1393        if ((posixID == 0)
1394            || (uprv_strcmp("C", posixID) == 0)
1395            || (uprv_strcmp("POSIX", posixID) == 0))
1396        {
1397            /* Maybe we got some garbage.  Try something more reasonable */
1398            posixID = getenv("LC_ALL");
1399            if (posixID == 0) {
1400                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1401                if (posixID == 0) {
1402                    posixID = getenv("LANG");
1403                }
1404            }
1405        }
1406    }
1407    if ((posixID==0)
1408        || (uprv_strcmp("C", posixID) == 0)
1409        || (uprv_strcmp("POSIX", posixID) == 0))
1410    {
1411        /* Nothing worked.  Give it a nice POSIX default value. */
1412        posixID = "en_US_POSIX";
1413    }
1414    return posixID;
1415}
1416
1417/* Return just the POSIX id for the default locale, whatever happens to be in
1418 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1419 */
1420static const char *uprv_getPOSIXIDForDefaultLocale(void)
1421{
1422    static const char* posixID = NULL;
1423    if (posixID == 0) {
1424        posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1425    }
1426    return posixID;
1427}
1428
1429/* Return just the POSIX id for the default codepage, whatever happens to be in
1430 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1431 */
1432static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1433{
1434    static const char* posixID = NULL;
1435    if (posixID == 0) {
1436        posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1437    }
1438    return posixID;
1439}
1440#endif
1441
1442/* NOTE: The caller should handle thread safety */
1443U_CAPI const char* U_EXPORT2
1444uprv_getDefaultLocaleID()
1445{
1446#if U_POSIX_LOCALE
1447/*
1448  Note that:  (a '!' means the ID is improper somehow)
1449     LC_ALL  ---->     default_loc          codepage
1450--------------------------------------------------------
1451     ab.CD             ab                   CD
1452     ab@CD             ab__CD               -
1453     ab@CD.EF          ab__CD               EF
1454
1455     ab_CD.EF@GH       ab_CD_GH             EF
1456
1457Some 'improper' ways to do the same as above:
1458  !  ab_CD@GH.EF       ab_CD_GH             EF
1459  !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1460  !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1461
1462     _CD@GH            _CD_GH               -
1463     _CD.EF@GH         _CD_GH               EF
1464
1465The variant cannot have dots in it.
1466The 'rightmost' variant (@xxx) wins.
1467The leftmost codepage (.xxx) wins.
1468*/
1469    char *correctedPOSIXLocale = 0;
1470    const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1471    const char *p;
1472    const char *q;
1473    int32_t len;
1474
1475    /* Format: (no spaces)
1476    ll [ _CC ] [ . MM ] [ @ VV]
1477
1478      l = lang, C = ctry, M = charmap, V = variant
1479    */
1480
1481    if (gCorrectedPOSIXLocale != NULL) {
1482        return gCorrectedPOSIXLocale;
1483    }
1484
1485    if ((p = uprv_strchr(posixID, '.')) != NULL) {
1486        /* assume new locale can't be larger than old one? */
1487        correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1488        /* Exit on memory allocation error. */
1489        if (correctedPOSIXLocale == NULL) {
1490            return NULL;
1491        }
1492        uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1493        correctedPOSIXLocale[p-posixID] = 0;
1494
1495        /* do not copy after the @ */
1496        if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1497            correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1498        }
1499    }
1500
1501    /* Note that we scan the *uncorrected* ID. */
1502    if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1503        if (correctedPOSIXLocale == NULL) {
1504            correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1505            /* Exit on memory allocation error. */
1506            if (correctedPOSIXLocale == NULL) {
1507                return NULL;
1508            }
1509            uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1510            correctedPOSIXLocale[p-posixID] = 0;
1511        }
1512        p++;
1513
1514        /* Take care of any special cases here.. */
1515        if (!uprv_strcmp(p, "nynorsk")) {
1516            p = "NY";
1517            /* Don't worry about no__NY. In practice, it won't appear. */
1518        }
1519
1520        if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1521            uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1522        }
1523        else {
1524            uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1525        }
1526
1527        if ((q = uprv_strchr(p, '.')) != NULL) {
1528            /* How big will the resulting string be? */
1529            len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1530            uprv_strncat(correctedPOSIXLocale, p, q-p);
1531            correctedPOSIXLocale[len] = 0;
1532        }
1533        else {
1534            /* Anything following the @ sign */
1535            uprv_strcat(correctedPOSIXLocale, p);
1536        }
1537
1538        /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1539         * How about 'russian' -> 'ru'?
1540         * Many of the other locales using ISO codes will be handled by the
1541         * canonicalization functions in uloc_getDefault.
1542         */
1543    }
1544
1545    /* Was a correction made? */
1546    if (correctedPOSIXLocale != NULL) {
1547        posixID = correctedPOSIXLocale;
1548    }
1549    else {
1550        /* copy it, just in case the original pointer goes away.  See j2395 */
1551        correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1552        /* Exit on memory allocation error. */
1553        if (correctedPOSIXLocale == NULL) {
1554            return NULL;
1555        }
1556        posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1557    }
1558
1559    if (gCorrectedPOSIXLocale == NULL) {
1560        gCorrectedPOSIXLocale = correctedPOSIXLocale;
1561        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1562        correctedPOSIXLocale = NULL;
1563    }
1564
1565    if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1566        uprv_free(correctedPOSIXLocale);
1567    }
1568
1569    return posixID;
1570
1571#elif defined(U_WINDOWS) || defined(U_MINGW)
1572    UErrorCode status = U_ZERO_ERROR;
1573    LCID id = GetThreadLocale();
1574    const char* locID = uprv_convertToPosix(id, &status);
1575
1576    if (U_FAILURE(status)) {
1577        locID = "en_US";
1578    }
1579    return locID;
1580
1581#elif defined(XP_MAC)
1582    int32_t script = MAC_LC_INIT_NUMBER;
1583    /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1584    int32_t region = MAC_LC_INIT_NUMBER;
1585    /* = GetScriptManagerVariable(smRegionCode);*/
1586    int32_t lang = MAC_LC_INIT_NUMBER;
1587    /* = GetScriptManagerVariable(smScriptLang);*/
1588    int32_t date_region = MAC_LC_INIT_NUMBER;
1589    const char* posixID = 0;
1590    int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1591    int32_t i;
1592    Intl1Hndl ih;
1593
1594    ih = (Intl1Hndl) GetIntlResource(1);
1595    if (ih)
1596        date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1597
1598    for (i = 0; i < count; i++) {
1599        if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1600             || (mac_lc_recs[i].script == script))
1601            && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1602             || (mac_lc_recs[i].region == region))
1603            && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1604             || (mac_lc_recs[i].lang == lang))
1605            && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1606             || (mac_lc_recs[i].date_region == date_region))
1607            )
1608        {
1609            posixID = mac_lc_recs[i].posixID;
1610            break;
1611        }
1612    }
1613
1614    return posixID;
1615
1616#elif defined(OS400)
1617    /* locales are process scoped and are by definition thread safe */
1618    static char correctedLocale[64];
1619    const  char *localeID = getenv("LC_ALL");
1620           char *p;
1621
1622    if (localeID == NULL)
1623        localeID = getenv("LANG");
1624    if (localeID == NULL)
1625        localeID = setlocale(LC_ALL, NULL);
1626    /* Make sure we have something... */
1627    if (localeID == NULL)
1628        return "en_US_POSIX";
1629
1630    /* Extract the locale name from the path. */
1631    if((p = uprv_strrchr(localeID, '/')) != NULL)
1632    {
1633        /* Increment p to start of locale name. */
1634        p++;
1635        localeID = p;
1636    }
1637
1638    /* Copy to work location. */
1639    uprv_strcpy(correctedLocale, localeID);
1640
1641    /* Strip off the '.locale' extension. */
1642    if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1643        *p = 0;
1644    }
1645
1646    /* Upper case the locale name. */
1647    T_CString_toUpperCase(correctedLocale);
1648
1649    /* See if we are using the POSIX locale.  Any of the
1650    * following are equivalent and use the same QLGPGCMA
1651    * (POSIX) locale.
1652    * QLGPGCMA2 means UCS2
1653    * QLGPGCMA_4 means UTF-32
1654    * QLGPGCMA_8 means UTF-8
1655    */
1656    if ((uprv_strcmp("C", correctedLocale) == 0) ||
1657        (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1658        (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1659    {
1660        uprv_strcpy(correctedLocale, "en_US_POSIX");
1661    }
1662    else
1663    {
1664        int16_t LocaleLen;
1665
1666        /* Lower case the lang portion. */
1667        for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1668        {
1669            *p = uprv_tolower(*p);
1670        }
1671
1672        /* Adjust for Euro.  After '_E' add 'URO'. */
1673        LocaleLen = uprv_strlen(correctedLocale);
1674        if (correctedLocale[LocaleLen - 2] == '_' &&
1675            correctedLocale[LocaleLen - 1] == 'E')
1676        {
1677            uprv_strcat(correctedLocale, "URO");
1678        }
1679
1680        /* If using Lotus-based locale then convert to
1681         * equivalent non Lotus.
1682         */
1683        else if (correctedLocale[LocaleLen - 2] == '_' &&
1684            correctedLocale[LocaleLen - 1] == 'L')
1685        {
1686            correctedLocale[LocaleLen - 2] = 0;
1687        }
1688
1689        /* There are separate simplified and traditional
1690         * locales called zh_HK_S and zh_HK_T.
1691         */
1692        else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1693        {
1694            uprv_strcpy(correctedLocale, "zh_HK");
1695        }
1696
1697        /* A special zh_CN_GBK locale...
1698        */
1699        else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1700        {
1701            uprv_strcpy(correctedLocale, "zh_CN");
1702        }
1703
1704    }
1705
1706    return correctedLocale;
1707#endif
1708
1709}
1710
1711#if !U_CHARSET_IS_UTF8
1712#if U_POSIX_LOCALE
1713/*
1714Due to various platform differences, one platform may specify a charset,
1715when they really mean a different charset. Remap the names so that they are
1716compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1717here. Before adding anything to this function, please consider adding unique
1718names to the ICU alias table in the data directory.
1719*/
1720static const char*
1721remapPlatformDependentCodepage(const char *locale, const char *name) {
1722    if (locale != NULL && *locale == 0) {
1723        /* Make sure that an empty locale is handled the same way. */
1724        locale = NULL;
1725    }
1726    if (name == NULL) {
1727        return NULL;
1728    }
1729#if defined(U_AIX)
1730    if (uprv_strcmp(name, "IBM-943") == 0) {
1731        /* Use the ASCII compatible ibm-943 */
1732        name = "Shift-JIS";
1733    }
1734    else if (uprv_strcmp(name, "IBM-1252") == 0) {
1735        /* Use the windows-1252 that contains the Euro */
1736        name = "IBM-5348";
1737    }
1738#elif defined(U_SOLARIS)
1739    if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1740        /* Solaris underspecifies the "EUC" name. */
1741        if (uprv_strcmp(locale, "zh_CN") == 0) {
1742            name = "EUC-CN";
1743        }
1744        else if (uprv_strcmp(locale, "zh_TW") == 0) {
1745            name = "EUC-TW";
1746        }
1747        else if (uprv_strcmp(locale, "ko_KR") == 0) {
1748            name = "EUC-KR";
1749        }
1750    }
1751    else if (uprv_strcmp(name, "eucJP") == 0) {
1752        /*
1753        ibm-954 is the best match.
1754        ibm-33722 is the default for eucJP (similar to Windows).
1755        */
1756        name = "eucjis";
1757    }
1758    else if (uprv_strcmp(name, "646") == 0) {
1759        /*
1760         * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1761         * ISO-8859-1 instead of US-ASCII(646).
1762         */
1763        name = "ISO-8859-1";
1764    }
1765#elif defined(U_DARWIN)
1766    if (locale == NULL && *name == 0) {
1767        /*
1768        No locale was specified, and an empty name was passed in.
1769        This usually indicates that nl_langinfo didn't return valid information.
1770        Mac OS X uses UTF-8 by default (especially the locale data and console).
1771        */
1772        name = "UTF-8";
1773    }
1774    else if (uprv_strcmp(name, "CP949") == 0) {
1775        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1776        name = "EUC-KR";
1777    }
1778    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1779        /*
1780         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1781         */
1782        name = "UTF-8";
1783    }
1784#elif defined(U_BSD)
1785    if (uprv_strcmp(name, "CP949") == 0) {
1786        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1787        name = "EUC-KR";
1788    }
1789#elif defined(U_HPUX)
1790    if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1791        /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1792        /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1793        name = "hkbig5";
1794    }
1795    else if (uprv_strcmp(name, "eucJP") == 0) {
1796        /*
1797        ibm-1350 is the best match, but unavailable.
1798        ibm-954 is mostly a superset of ibm-1350.
1799        ibm-33722 is the default for eucJP (similar to Windows).
1800        */
1801        name = "eucjis";
1802    }
1803#elif defined(U_LINUX)
1804    if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1805        /* Linux underspecifies the "EUC" name. */
1806        if (uprv_strcmp(locale, "korean") == 0) {
1807            name = "EUC-KR";
1808        }
1809        else if (uprv_strcmp(locale, "japanese") == 0) {
1810            /* See comment below about eucJP */
1811            name = "eucjis";
1812        }
1813    }
1814    else if (uprv_strcmp(name, "eucjp") == 0) {
1815        /*
1816        ibm-1350 is the best match, but unavailable.
1817        ibm-954 is mostly a superset of ibm-1350.
1818        ibm-33722 is the default for eucJP (similar to Windows).
1819        */
1820        name = "eucjis";
1821    }
1822    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1823            (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1824        /*
1825         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1826         */
1827        name = "UTF-8";
1828    }
1829    /*
1830     * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1831     * it by falling back to 'US-ASCII' when NULL is returned from this
1832     * function. So, we don't have to worry about it here.
1833     */
1834#endif
1835    /* return NULL when "" is passed in */
1836    if (*name == 0) {
1837        name = NULL;
1838    }
1839    return name;
1840}
1841
1842static const char*
1843getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1844{
1845    char localeBuf[100];
1846    const char *name = NULL;
1847    char *variant = NULL;
1848
1849    if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1850        size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1851        uprv_strncpy(localeBuf, localeName, localeCapacity);
1852        localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1853        name = uprv_strncpy(buffer, name+1, buffCapacity);
1854        buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1855        if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1856            *variant = 0;
1857        }
1858        name = remapPlatformDependentCodepage(localeBuf, name);
1859    }
1860    return name;
1861}
1862#endif
1863
1864static const char*
1865int_getDefaultCodepage()
1866{
1867#if defined(OS400)
1868    uint32_t ccsid = 37; /* Default to ibm-37 */
1869    static char codepage[64];
1870    Qwc_JOBI0400_t jobinfo;
1871    Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1872
1873    EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1874        "*                         ", "                ", &error);
1875
1876    if (error.Bytes_Available == 0) {
1877        if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1878            ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1879        }
1880        else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1881            ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1882        }
1883        /* else use the default */
1884    }
1885    sprintf(codepage,"ibm-%d", ccsid);
1886    return codepage;
1887
1888#elif defined(OS390)
1889    static char codepage[64];
1890
1891    strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1892    strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1893    codepage[63] = 0; /* NULL terminate */
1894
1895    return codepage;
1896
1897#elif defined(XP_MAC)
1898    return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1899
1900#elif defined(U_WINDOWS)
1901    static char codepage[64];
1902    sprintf(codepage, "windows-%d", GetACP());
1903    return codepage;
1904
1905#elif U_POSIX_LOCALE
1906    static char codesetName[100];
1907    const char *localeName = NULL;
1908    const char *name = NULL;
1909
1910    localeName = uprv_getPOSIXIDForDefaultCodepage();
1911    uprv_memset(codesetName, 0, sizeof(codesetName));
1912#if U_HAVE_NL_LANGINFO_CODESET
1913    /* When available, check nl_langinfo first because it usually gives more
1914       useful names. It depends on LC_CTYPE.
1915       nl_langinfo may use the same buffer as setlocale. */
1916    {
1917        const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1918#if defined(U_DARWIN) || defined(U_LINUX)
1919        /*
1920         * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1921         * instead of ASCII.
1922         */
1923        if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1924            codeset = remapPlatformDependentCodepage(localeName, codeset);
1925        } else
1926#endif
1927        {
1928            codeset = remapPlatformDependentCodepage(NULL, codeset);
1929        }
1930
1931        if (codeset != NULL) {
1932            uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1933            codesetName[sizeof(codesetName)-1] = 0;
1934            return codesetName;
1935        }
1936    }
1937#endif
1938
1939    /* Use setlocale in a nice way, and then check some environment variables.
1940       Maybe the application used setlocale already.
1941    */
1942    uprv_memset(codesetName, 0, sizeof(codesetName));
1943    name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1944    if (name) {
1945        /* if we can find the codeset name from setlocale, return that. */
1946        return name;
1947    }
1948
1949    if (*codesetName == 0)
1950    {
1951        /* Everything failed. Return US ASCII (ISO 646). */
1952        (void)uprv_strcpy(codesetName, "US-ASCII");
1953    }
1954    return codesetName;
1955#else
1956    return "US-ASCII";
1957#endif
1958}
1959
1960
1961U_CAPI const char*  U_EXPORT2
1962uprv_getDefaultCodepage()
1963{
1964    static char const  *name = NULL;
1965    umtx_lock(NULL);
1966    if (name == NULL) {
1967        name = int_getDefaultCodepage();
1968    }
1969    umtx_unlock(NULL);
1970    return name;
1971}
1972#endif  /* !U_CHARSET_IS_UTF8 */
1973
1974
1975/* end of platform-specific implementation -------------- */
1976
1977/* version handling --------------------------------------------------------- */
1978
1979U_CAPI void U_EXPORT2
1980u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1981    char *end;
1982    uint16_t part=0;
1983
1984    if(versionArray==NULL) {
1985        return;
1986    }
1987
1988    if(versionString!=NULL) {
1989        for(;;) {
1990            versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1991            if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1992                break;
1993            }
1994            versionString=end+1;
1995        }
1996    }
1997
1998    while(part<U_MAX_VERSION_LENGTH) {
1999        versionArray[part++]=0;
2000    }
2001}
2002
2003U_CAPI void U_EXPORT2
2004u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2005    if(versionArray!=NULL && versionString!=NULL) {
2006        char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2007        int32_t len = u_strlen(versionString);
2008        if(len>U_MAX_VERSION_STRING_LENGTH) {
2009            len = U_MAX_VERSION_STRING_LENGTH;
2010        }
2011        u_UCharsToChars(versionString, versionChars, len);
2012        versionChars[len]=0;
2013        u_versionFromString(versionArray, versionChars);
2014    }
2015}
2016
2017U_CAPI void U_EXPORT2
2018u_versionToString(UVersionInfo versionArray, char *versionString) {
2019    uint16_t count, part;
2020    uint8_t field;
2021
2022    if(versionString==NULL) {
2023        return;
2024    }
2025
2026    if(versionArray==NULL) {
2027        versionString[0]=0;
2028        return;
2029    }
2030
2031    /* count how many fields need to be written */
2032    for(count=4; count>0 && versionArray[count-1]==0; --count) {
2033    }
2034
2035    if(count <= 1) {
2036        count = 2;
2037    }
2038
2039    /* write the first part */
2040    /* write the decimal field value */
2041    field=versionArray[0];
2042    if(field>=100) {
2043        *versionString++=(char)('0'+field/100);
2044        field%=100;
2045    }
2046    if(field>=10) {
2047        *versionString++=(char)('0'+field/10);
2048        field%=10;
2049    }
2050    *versionString++=(char)('0'+field);
2051
2052    /* write the following parts */
2053    for(part=1; part<count; ++part) {
2054        /* write a dot first */
2055        *versionString++=U_VERSION_DELIMITER;
2056
2057        /* write the decimal field value */
2058        field=versionArray[part];
2059        if(field>=100) {
2060            *versionString++=(char)('0'+field/100);
2061            field%=100;
2062        }
2063        if(field>=10) {
2064            *versionString++=(char)('0'+field/10);
2065            field%=10;
2066        }
2067        *versionString++=(char)('0'+field);
2068    }
2069
2070    /* NUL-terminate */
2071    *versionString=0;
2072}
2073
2074U_CAPI void U_EXPORT2
2075u_getVersion(UVersionInfo versionArray) {
2076    u_versionFromString(versionArray, U_ICU_VERSION);
2077}
2078
2079/**
2080 * icucfg.h dependent code
2081 */
2082
2083#if U_ENABLE_DYLOAD
2084
2085#if defined(U_CHECK_DYLOAD)
2086
2087#if defined(HAVE_DLOPEN)
2088
2089#ifdef HAVE_DLFCN_H
2090#ifdef __MVS__
2091#ifndef __SUSV3
2092#define __SUSV3 1
2093#endif
2094#endif
2095#include <dlfcn.h>
2096#endif
2097
2098U_INTERNAL void * U_EXPORT2
2099uprv_dl_open(const char *libName, UErrorCode *status) {
2100  void *ret = NULL;
2101  if(U_FAILURE(*status)) return ret;
2102  ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2103  if(ret==NULL) {
2104#ifndef U_TRACE_DYLOAD
2105    perror("dlopen");
2106#endif
2107    *status = U_MISSING_RESOURCE_ERROR;
2108  }
2109  return ret;
2110}
2111
2112U_INTERNAL void U_EXPORT2
2113uprv_dl_close(void *lib, UErrorCode *status) {
2114  if(U_FAILURE(*status)) return;
2115  dlclose(lib);
2116}
2117
2118U_INTERNAL UVoidFunction* U_EXPORT2
2119uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2120  union {
2121      void* voidPtr;
2122      UVoidFunction* voidFunc;
2123  } ret;
2124  ret.voidPtr = NULL;
2125  if(U_FAILURE(*status)) return NULL;
2126  /*
2127   * ISO forbids the following cast, but it's needed for dlsym.
2128   *  See: http://pubs.opengroup.org/onlinepubs/009695399/functions/dlsym.html
2129   *  See: http://www.trilithium.com/johan/2004/12/problem-with-dlsym/
2130   */
2131  ret.voidPtr = dlsym(lib, sym);
2132  if(ret.voidPtr == NULL) {
2133    *status = U_MISSING_RESOURCE_ERROR;
2134  }
2135  return ret.voidFunc;
2136}
2137
2138#else
2139
2140/* null (nonexistent) implementation. */
2141
2142U_INTERNAL void * U_EXPORT2
2143uprv_dl_open(const char *libName, UErrorCode *status) {
2144  if(U_FAILURE(*status)) return NULL;
2145  *status = U_UNSUPPORTED_ERROR;
2146  return NULL;
2147}
2148
2149U_INTERNAL void U_EXPORT2
2150uprv_dl_close(void *lib, UErrorCode *status) {
2151  if(U_FAILURE(*status)) return;
2152  *status = U_UNSUPPORTED_ERROR;
2153  return;
2154}
2155
2156
2157U_INTERNAL UVoidFunction* U_EXPORT2
2158uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2159  if(U_SUCCESS(*status)) {
2160    *status = U_UNSUPPORTED_ERROR;
2161  }
2162  return (UVoidFunction*)NULL;
2163}
2164
2165
2166
2167#endif
2168
2169#elif defined U_WINDOWS
2170
2171U_INTERNAL void * U_EXPORT2
2172uprv_dl_open(const char *libName, UErrorCode *status) {
2173  HMODULE lib = NULL;
2174
2175  if(U_FAILURE(*status)) return NULL;
2176
2177  lib = LoadLibraryA(libName);
2178
2179  if(lib==NULL) {
2180    *status = U_MISSING_RESOURCE_ERROR;
2181  }
2182
2183  return (void*)lib;
2184}
2185
2186U_INTERNAL void U_EXPORT2
2187uprv_dl_close(void *lib, UErrorCode *status) {
2188  HMODULE handle = (HMODULE)lib;
2189  if(U_FAILURE(*status)) return;
2190
2191  FreeLibrary(handle);
2192
2193  return;
2194}
2195
2196
2197U_INTERNAL UVoidFunction* U_EXPORT2
2198uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2199  HMODULE handle = (HMODULE)lib;
2200  UVoidFunction* addr = NULL;
2201
2202  if(U_FAILURE(*status) || lib==NULL) return NULL;
2203
2204  addr = (UVoidFunction*)GetProcAddress(handle, sym);
2205
2206  if(addr==NULL) {
2207    DWORD lastError = GetLastError();
2208    if(lastError == ERROR_PROC_NOT_FOUND) {
2209      *status = U_MISSING_RESOURCE_ERROR;
2210    } else {
2211      *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2212    }
2213  }
2214
2215  return addr;
2216}
2217
2218
2219#else
2220
2221/* No dynamic loading set. */
2222
2223U_INTERNAL void * U_EXPORT2
2224uprv_dl_open(const char *libName, UErrorCode *status) {
2225    if(U_FAILURE(*status)) return NULL;
2226    *status = U_UNSUPPORTED_ERROR;
2227    return NULL;
2228}
2229
2230U_INTERNAL void U_EXPORT2
2231uprv_dl_close(void *lib, UErrorCode *status) {
2232    if(U_FAILURE(*status)) return;
2233    *status = U_UNSUPPORTED_ERROR;
2234    return;
2235}
2236
2237
2238U_INTERNAL UVoidFunction* U_EXPORT2
2239uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2240  if(U_SUCCESS(*status)) {
2241    *status = U_UNSUPPORTED_ERROR;
2242  }
2243  return (UVoidFunction*)NULL;
2244}
2245
2246
2247#endif
2248
2249#endif /* U_ENABLE_DYLOAD */
2250
2251/*
2252 * Hey, Emacs, please set the following:
2253 *
2254 * Local Variables:
2255 * indent-tabs-mode: nil
2256 * End:
2257 *
2258 */
2259