1/*
2******************************************************************************
3*
4*   Copyright (C) 1997-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*
9*  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
10*
11*   Date        Name        Description
12*   04/14/97    aliu        Creation.
13*   04/24/97    aliu        Added getDefaultDataDirectory() and
14*                            getDefaultLocaleID().
15*   04/28/97    aliu        Rewritten to assume Unix and apply general methods
16*                            for assumed case.  Non-UNIX platforms must be
17*                            special-cased.  Rewrote numeric methods dealing
18*                            with NaN and Infinity to be platform independent
19*                             over all IEEE 754 platforms.
20*   05/13/97    aliu        Restored sign of timezone
21*                            (semantics are hours West of GMT)
22*   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
23*                             nextDouble..
24*   07/22/98    stephen     Added remainder, max, min, trunc
25*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
26*   08/24/98    stephen     Added longBitsFromDouble
27*   09/08/98    stephen     Minor changes for Mac Port
28*   03/02/99    stephen     Removed openFile().  Added AS400 support.
29*                            Fixed EBCDIC tables
30*   04/15/99    stephen     Converted to C.
31*   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
32*   08/04/99    jeffrey R.  Added OS/2 changes
33*   11/15/99    helena      Integrated S/390 IEEE support.
34*   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
35*   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
36*   01/03/08    Steven L.   Fake Time Support
37******************************************************************************
38*/
39
40/* Define _XOPEN_SOURCE for Solaris and friends. */
41/* NetBSD needs it to be >= 4 */
42#if !defined(_XOPEN_SOURCE)
43#if __STDC_VERSION__ >= 199901L
44/* It is invalid to compile an XPG3, XPG4, XPG4v2 or XPG5 application using c99 on Solaris */
45#define _XOPEN_SOURCE 600
46#else
47#define _XOPEN_SOURCE 4
48#endif
49#endif
50
51/* Make sure things like readlink and such functions work.
52Poorly upgraded Solaris machines can't have this defined.
53Cleanly installed Solaris can use this #define.
54*/
55#if !defined(_XOPEN_SOURCE_EXTENDED) && ((!defined(__STDC_VERSION__) || __STDC_VERSION__ >= 199901L) || defined(__xlc__))
56#define _XOPEN_SOURCE_EXTENDED 1
57#endif
58
59/* include ICU headers */
60#include "unicode/utypes.h"
61#include "unicode/putil.h"
62#include "unicode/ustring.h"
63#include "putilimp.h"
64#include "uassert.h"
65#include "umutex.h"
66#include "cmemory.h"
67#include "cstring.h"
68#include "locmap.h"
69#include "ucln_cmn.h"
70
71/* Include standard headers. */
72#include <stdio.h>
73#include <stdlib.h>
74#include <string.h>
75#include <math.h>
76#include <locale.h>
77#include <float.h>
78#include <time.h>
79
80/* include system headers */
81#ifdef U_WINDOWS
82#   define WIN32_LEAN_AND_MEAN
83#   define VC_EXTRALEAN
84#   define NOUSER
85#   define NOSERVICE
86#   define NOIME
87#   define NOMCX
88#   include <windows.h>
89#   include "wintz.h"
90#elif defined(U_CYGWIN) && defined(__STRICT_ANSI__)
91/* tzset isn't defined in strict ANSI on Cygwin. */
92#   undef __STRICT_ANSI__
93#elif defined(OS400)
94#   include <float.h>
95#   include <qusec.h>       /* error code structure */
96#   include <qusrjobi.h>
97#   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
98#   include <mih/testptr.h> /* For uprv_maximumPtr */
99#elif defined(XP_MAC)
100#   include <Files.h>
101#   include <IntlResources.h>
102#   include <Script.h>
103#   include <Folders.h>
104#   include <MacTypes.h>
105#   include <TextUtils.h>
106#   define ICU_NO_USER_DATA_OVERRIDE 1
107#elif defined(OS390)
108#include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
109#elif defined(U_DARWIN) || defined(U_LINUX) || defined(U_BSD)
110#include <limits.h>
111#include <unistd.h>
112#elif defined(U_QNX)
113#include <sys/neutrino.h>
114#elif defined(U_SOLARIS)
115# ifndef _XPG4_2
116#  define _XPG4_2
117# endif
118#endif
119
120
121#if defined(U_DARWIN)
122#include <TargetConditionals.h>
123#endif
124
125#ifndef U_WINDOWS
126#include <sys/time.h>
127#endif
128
129/*
130 * Only include langinfo.h if we have a way to get the codeset. If we later
131 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
132 *
133 */
134
135#if U_HAVE_NL_LANGINFO_CODESET
136#include <langinfo.h>
137#endif
138
139/**
140 * Simple things (presence of functions, etc) should just go in configure.in and be added to
141 * icucfg.h via autoheader.
142 */
143#if defined(HAVE_CONFIG_H)
144#include "icucfg.h"
145#endif
146
147/* Define the extension for data files, again... */
148#define DATA_TYPE "dat"
149
150/* Leave this copyright notice here! */
151static const char copyright[] = U_COPYRIGHT_STRING;
152
153/* floating point implementations ------------------------------------------- */
154
155/* We return QNAN rather than SNAN*/
156#define SIGN 0x80000000U
157
158/* Make it easy to define certain types of constants */
159typedef union {
160    int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
161    double d64;
162} BitPatternConversion;
163static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
164static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
165
166/*---------------------------------------------------------------------------
167  Platform utilities
168  Our general strategy is to assume we're on a POSIX platform.  Platforms which
169  are non-POSIX must declare themselves so.  The default POSIX implementation
170  will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
171  functions).
172  ---------------------------------------------------------------------------*/
173
174#if defined(U_WINDOWS) || defined(XP_MAC) || defined(OS400)
175#   undef U_POSIX_LOCALE
176#else
177#   define U_POSIX_LOCALE    1
178#endif
179
180/*
181    WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
182    can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
183*/
184#if !IEEE_754
185static char*
186u_topNBytesOfDouble(double* d, int n)
187{
188#if U_IS_BIG_ENDIAN
189    return (char*)d;
190#else
191    return (char*)(d + 1) - n;
192#endif
193}
194
195static char*
196u_bottomNBytesOfDouble(double* d, int n)
197{
198#if U_IS_BIG_ENDIAN
199    return (char*)(d + 1) - n;
200#else
201    return (char*)d;
202#endif
203}
204#endif   /* !IEEE_754 */
205
206#if IEEE_754
207static UBool
208u_signBit(double d) {
209    uint8_t hiByte;
210#if U_IS_BIG_ENDIAN
211    hiByte = *(uint8_t *)&d;
212#else
213    hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
214#endif
215    return (hiByte & 0x80) != 0;
216}
217#endif
218
219
220
221#if defined (U_DEBUG_FAKETIME)
222/* Override the clock to test things without having to move the system clock.
223 * Assumes POSIX gettimeofday() will function
224 */
225UDate fakeClock_t0 = 0; /** Time to start the clock from **/
226UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
227UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
228static UMTX fakeClockMutex = NULL;
229
230static UDate getUTCtime_real() {
231    struct timeval posixTime;
232    gettimeofday(&posixTime, NULL);
233    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
234}
235
236static UDate getUTCtime_fake() {
237    umtx_lock(&fakeClockMutex);
238    if(!fakeClock_set) {
239        UDate real = getUTCtime_real();
240        const char *fake_start = getenv("U_FAKETIME_START");
241        if((fake_start!=NULL) && (fake_start[0]!=0)) {
242            sscanf(fake_start,"%lf",&fakeClock_t0);
243            fakeClock_dt = fakeClock_t0 - real;
244            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
245                    "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
246                    fakeClock_t0, fake_start, fakeClock_dt, real);
247        } else {
248          fakeClock_dt = 0;
249            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
250                    "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
251        }
252        fakeClock_set = TRUE;
253    }
254    umtx_unlock(&fakeClockMutex);
255
256    return getUTCtime_real() + fakeClock_dt;
257}
258#endif
259
260#if defined(U_WINDOWS)
261typedef union {
262    int64_t int64;
263    FILETIME fileTime;
264} FileTimeConversion;   /* This is like a ULARGE_INTEGER */
265
266/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
267#define EPOCH_BIAS  INT64_C(116444736000000000)
268#define HECTONANOSECOND_PER_MILLISECOND   10000
269
270#endif
271
272/*---------------------------------------------------------------------------
273  Universal Implementations
274  These are designed to work on all platforms.  Try these, and if they
275  don't work on your platform, then special case your platform with new
276  implementations.
277---------------------------------------------------------------------------*/
278
279U_CAPI UDate U_EXPORT2
280uprv_getUTCtime()
281{
282#if defined(U_DEBUG_FAKETIME)
283    return getUTCtime_fake(); /* Hook for overriding the clock */
284#else
285    return uprv_getRawUTCtime();
286#endif
287}
288
289/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
290U_CAPI UDate U_EXPORT2
291uprv_getRawUTCtime()
292{
293#if defined(XP_MAC)
294    time_t t, t1, t2;
295    struct tm tmrec;
296
297    uprv_memset( &tmrec, 0, sizeof(tmrec) );
298    tmrec.tm_year = 70;
299    tmrec.tm_mon = 0;
300    tmrec.tm_mday = 1;
301    t1 = mktime(&tmrec);    /* seconds of 1/1/1970*/
302
303    time(&t);
304    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
305    t2 = mktime(&tmrec);    /* seconds of current GMT*/
306    return (UDate)(t2 - t1) * U_MILLIS_PER_SECOND;         /* GMT (or UTC) in seconds since 1970*/
307#elif defined(U_WINDOWS)
308
309    FileTimeConversion winTime;
310    GetSystemTimeAsFileTime(&winTime.fileTime);
311    return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
312#else
313
314#if defined(HAVE_GETTIMEOFDAY)
315    struct timeval posixTime;
316    gettimeofday(&posixTime, NULL);
317    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
318#else
319    time_t epochtime;
320    time(&epochtime);
321    return (UDate)epochtime * U_MILLIS_PER_SECOND;
322#endif
323
324#endif
325}
326
327/*-----------------------------------------------------------------------------
328  IEEE 754
329  These methods detect and return NaN and infinity values for doubles
330  conforming to IEEE 754.  Platforms which support this standard include X86,
331  Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
332  If this doesn't work on your platform, you have non-IEEE floating-point, and
333  will need to code your own versions.  A naive implementation is to return 0.0
334  for getNaN and getInfinity, and false for isNaN and isInfinite.
335  ---------------------------------------------------------------------------*/
336
337U_CAPI UBool U_EXPORT2
338uprv_isNaN(double number)
339{
340#if IEEE_754
341    BitPatternConversion convertedNumber;
342    convertedNumber.d64 = number;
343    /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
344    return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
345
346#elif defined(OS390)
347    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
348                        sizeof(uint32_t));
349    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
350                        sizeof(uint32_t));
351
352    return ((highBits & 0x7F080000L) == 0x7F080000L) &&
353      (lowBits == 0x00000000L);
354
355#else
356    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
357    /* you'll need to replace this default implementation with what's correct*/
358    /* for your platform.*/
359    return number != number;
360#endif
361}
362
363U_CAPI UBool U_EXPORT2
364uprv_isInfinite(double number)
365{
366#if IEEE_754
367    BitPatternConversion convertedNumber;
368    convertedNumber.d64 = number;
369    /* Infinity is exactly 0x7FF0000000000000U. */
370    return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
371#elif defined(OS390)
372    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
373                        sizeof(uint32_t));
374    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
375                        sizeof(uint32_t));
376
377    return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
378
379#else
380    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
381    /* value, you'll need to replace this default implementation with what's*/
382    /* correct for your platform.*/
383    return number == (2.0 * number);
384#endif
385}
386
387U_CAPI UBool U_EXPORT2
388uprv_isPositiveInfinity(double number)
389{
390#if IEEE_754 || defined(OS390)
391    return (UBool)(number > 0 && uprv_isInfinite(number));
392#else
393    return uprv_isInfinite(number);
394#endif
395}
396
397U_CAPI UBool U_EXPORT2
398uprv_isNegativeInfinity(double number)
399{
400#if IEEE_754 || defined(OS390)
401    return (UBool)(number < 0 && uprv_isInfinite(number));
402
403#else
404    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
405                        sizeof(uint32_t));
406    return((highBits & SIGN) && uprv_isInfinite(number));
407
408#endif
409}
410
411U_CAPI double U_EXPORT2
412uprv_getNaN()
413{
414#if IEEE_754 || defined(OS390)
415    return gNan.d64;
416#else
417    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
418    /* you'll need to replace this default implementation with what's correct*/
419    /* for your platform.*/
420    return 0.0;
421#endif
422}
423
424U_CAPI double U_EXPORT2
425uprv_getInfinity()
426{
427#if IEEE_754 || defined(OS390)
428    return gInf.d64;
429#else
430    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
431    /* value, you'll need to replace this default implementation with what's*/
432    /* correct for your platform.*/
433    return 0.0;
434#endif
435}
436
437U_CAPI double U_EXPORT2
438uprv_floor(double x)
439{
440    return floor(x);
441}
442
443U_CAPI double U_EXPORT2
444uprv_ceil(double x)
445{
446    return ceil(x);
447}
448
449U_CAPI double U_EXPORT2
450uprv_round(double x)
451{
452    return uprv_floor(x + 0.5);
453}
454
455U_CAPI double U_EXPORT2
456uprv_fabs(double x)
457{
458    return fabs(x);
459}
460
461U_CAPI double U_EXPORT2
462uprv_modf(double x, double* y)
463{
464    return modf(x, y);
465}
466
467U_CAPI double U_EXPORT2
468uprv_fmod(double x, double y)
469{
470    return fmod(x, y);
471}
472
473U_CAPI double U_EXPORT2
474uprv_pow(double x, double y)
475{
476    /* This is declared as "double pow(double x, double y)" */
477    return pow(x, y);
478}
479
480U_CAPI double U_EXPORT2
481uprv_pow10(int32_t x)
482{
483    return pow(10.0, (double)x);
484}
485
486U_CAPI double U_EXPORT2
487uprv_fmax(double x, double y)
488{
489#if IEEE_754
490    /* first handle NaN*/
491    if(uprv_isNaN(x) || uprv_isNaN(y))
492        return uprv_getNaN();
493
494    /* check for -0 and 0*/
495    if(x == 0.0 && y == 0.0 && u_signBit(x))
496        return y;
497
498#endif
499
500    /* this should work for all flt point w/o NaN and Inf special cases */
501    return (x > y ? x : y);
502}
503
504U_CAPI double U_EXPORT2
505uprv_fmin(double x, double y)
506{
507#if IEEE_754
508    /* first handle NaN*/
509    if(uprv_isNaN(x) || uprv_isNaN(y))
510        return uprv_getNaN();
511
512    /* check for -0 and 0*/
513    if(x == 0.0 && y == 0.0 && u_signBit(y))
514        return y;
515
516#endif
517
518    /* this should work for all flt point w/o NaN and Inf special cases */
519    return (x > y ? y : x);
520}
521
522/**
523 * Truncates the given double.
524 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
525 * This is different than calling floor() or ceil():
526 * floor(3.3) = 3, floor(-3.3) = -4
527 * ceil(3.3) = 4, ceil(-3.3) = -3
528 */
529U_CAPI double U_EXPORT2
530uprv_trunc(double d)
531{
532#if IEEE_754
533    /* handle error cases*/
534    if(uprv_isNaN(d))
535        return uprv_getNaN();
536    if(uprv_isInfinite(d))
537        return uprv_getInfinity();
538
539    if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
540        return ceil(d);
541    else
542        return floor(d);
543
544#else
545    return d >= 0 ? floor(d) : ceil(d);
546
547#endif
548}
549
550/**
551 * Return the largest positive number that can be represented by an integer
552 * type of arbitrary bit length.
553 */
554U_CAPI double U_EXPORT2
555uprv_maxMantissa(void)
556{
557    return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
558}
559
560U_CAPI double U_EXPORT2
561uprv_log(double d)
562{
563    return log(d);
564}
565
566U_CAPI void * U_EXPORT2
567uprv_maximumPtr(void * base)
568{
569#if defined(OS400)
570    /*
571     * With the provided function we should never be out of range of a given segment
572     * (a traditional/typical segment that is).  Our segments have 5 bytes for the
573     * id and 3 bytes for the offset.  The key is that the casting takes care of
574     * only retrieving the offset portion minus x1000.  Hence, the smallest offset
575     * seen in a program is x001000 and when casted to an int would be 0.
576     * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
577     *
578     * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
579     * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
580     * This function determines the activation based on the pointer that is passed in and
581     * calculates the appropriate maximum available size for
582     * each pointer type (TERASPACE and non-TERASPACE)
583     *
584     * Unlike other operating systems, the pointer model isn't determined at
585     * compile time on i5/OS.
586     */
587    if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
588        /* if it is a TERASPACE pointer the max is 2GB - 4k */
589        return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
590    }
591    /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
592    return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
593
594#else
595    return U_MAX_PTR(base);
596#endif
597}
598
599/*---------------------------------------------------------------------------
600  Platform-specific Implementations
601  Try these, and if they don't work on your platform, then special case your
602  platform with new implementations.
603  ---------------------------------------------------------------------------*/
604
605/* Generic time zone layer -------------------------------------------------- */
606
607/* Time zone utilities */
608U_CAPI void U_EXPORT2
609uprv_tzset()
610{
611#ifdef U_TZSET
612    U_TZSET();
613#else
614    /* no initialization*/
615#endif
616}
617
618U_CAPI int32_t U_EXPORT2
619uprv_timezone()
620{
621#ifdef U_TIMEZONE
622    return U_TIMEZONE;
623#else
624    time_t t, t1, t2;
625    struct tm tmrec;
626#ifndef U_IOS
627    UBool dst_checked;
628#endif
629    int32_t tdiff = 0;
630
631    time(&t);
632    uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
633#ifndef U_IOS
634    dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
635#endif
636    t1 = mktime(&tmrec);                 /* local time in seconds*/
637    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
638    t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
639    tdiff = t2 - t1;
640#ifndef U_IOS
641    /* On iOS the calculated tdiff is correct so and doesn't need this dst
642       shift applied. */
643    /* imitate NT behaviour, which returns same timezone offset to GMT for
644       winter and summer*/
645    if (dst_checked)
646        tdiff += 3600;
647#endif
648    return tdiff;
649#endif
650}
651
652/* Note that U_TZNAME does *not* have to be tzname, but if it is,
653   some platforms need to have it declared here. */
654
655#if defined(U_TZNAME) && (defined(U_IRIX) || defined(U_DARWIN) || defined(U_CYGWIN))
656/* RS6000 and others reject char **tzname.  */
657extern U_IMPORT char *U_TZNAME[];
658#endif
659
660#if !UCONFIG_NO_FILE_IO && ((defined(U_DARWIN) && !defined(U_IOS)) || defined(U_LINUX) || defined(U_BSD))
661/* These platforms are likely to use Olson timezone IDs. */
662#define CHECK_LOCALTIME_LINK 1
663#if defined(U_DARWIN)
664#include <tzfile.h>
665#define TZZONEINFO      (TZDIR "/")
666#else
667#define TZDEFAULT       "/etc/localtime"
668#define TZZONEINFO      "/usr/share/zoneinfo/"
669#endif
670#if U_HAVE_DIRENT_H
671#define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
672/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
673   symlinked to /etc/localtime, which makes searchForTZFile return
674   'localtime' when it's the first match. */
675#define TZFILE_SKIP2    "localtime"
676#define SEARCH_TZFILE
677#include <dirent.h>  /* Needed to search through system timezone files */
678#endif
679static char gTimeZoneBuffer[PATH_MAX];
680static char *gTimeZoneBufferPtr = NULL;
681#endif
682
683#ifndef U_WINDOWS
684#define isNonDigit(ch) (ch < '0' || '9' < ch)
685static UBool isValidOlsonID(const char *id) {
686    int32_t idx = 0;
687
688    /* Determine if this is something like Iceland (Olson ID)
689    or AST4ADT (non-Olson ID) */
690    while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
691        idx++;
692    }
693
694    /* If we went through the whole string, then it might be okay.
695    The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
696    "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
697    The rest of the time it could be an Olson ID. George */
698    return (UBool)(id[idx] == 0
699        || uprv_strcmp(id, "PST8PDT") == 0
700        || uprv_strcmp(id, "MST7MDT") == 0
701        || uprv_strcmp(id, "CST6CDT") == 0
702        || uprv_strcmp(id, "EST5EDT") == 0);
703}
704
705/* On some Unix-like OS, 'posix' subdirectory in
706   /usr/share/zoneinfo replicates the top-level contents. 'right'
707   subdirectory has the same set of files, but individual files
708   are different from those in the top-level directory or 'posix'
709   because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
710   has files for UTC.
711   When the first match for /etc/localtime is in either of them
712   (usually in posix because 'right' has different file contents),
713   or TZ environment variable points to one of them, createTimeZone
714   fails because, say, 'posix/America/New_York' is not an Olson
715   timezone id ('America/New_York' is). So, we have to skip
716   'posix/' and 'right/' at the beginning. */
717static void skipZoneIDPrefix(const char** id) {
718    if (uprv_strncmp(*id, "posix/", 6) == 0
719        || uprv_strncmp(*id, "right/", 6) == 0)
720    {
721        *id += 6;
722    }
723}
724#endif
725
726#if defined(U_TZNAME) && !defined(U_WINDOWS)
727
728#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
729typedef struct OffsetZoneMapping {
730    int32_t offsetSeconds;
731    int32_t daylightType; /* 1=daylight in June, 2=daylight in December*/
732    const char *stdID;
733    const char *dstID;
734    const char *olsonID;
735} OffsetZoneMapping;
736
737/*
738This list tries to disambiguate a set of abbreviated timezone IDs and offsets
739and maps it to an Olson ID.
740Before adding anything to this list, take a look at
741icu/source/tools/tzcode/tz.alias
742Sometimes no daylight savings (0) is important to define due to aliases.
743This list can be tested with icu/source/test/compat/tzone.pl
744More values could be added to daylightType to increase precision.
745*/
746static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
747    {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
748    {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
749    {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
750    {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
751    {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
752    {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
753    {-36000, 2, "EST", "EST", "Australia/Sydney"},
754    {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
755    {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
756    {-34200, 2, "CST", "CST", "Australia/South"},
757    {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
758    {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
759    {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
760    {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
761    {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
762    {-28800, 2, "WST", "WST", "Australia/West"},
763    {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
764    {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
765    {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
766    {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
767    {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
768    {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
769    {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
770    {-14400, 1, "AZT", "AZST", "Asia/Baku"},
771    {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
772    {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
773    {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
774    {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
775    {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
776    {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
777    {-3600, 0, "CET", "WEST", "Africa/Algiers"},
778    {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
779    {0, 1, "GMT", "IST", "Europe/Dublin"},
780    {0, 1, "GMT", "BST", "Europe/London"},
781    {0, 0, "WET", "WEST", "Africa/Casablanca"},
782    {0, 0, "WET", "WET", "Africa/El_Aaiun"},
783    {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
784    {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
785    {10800, 1, "PMST", "PMDT", "America/Miquelon"},
786    {10800, 2, "UYT", "UYST", "America/Montevideo"},
787    {10800, 1, "WGT", "WGST", "America/Godthab"},
788    {10800, 2, "BRT", "BRST", "Brazil/East"},
789    {12600, 1, "NST", "NDT", "America/St_Johns"},
790    {14400, 1, "AST", "ADT", "Canada/Atlantic"},
791    {14400, 2, "AMT", "AMST", "America/Cuiaba"},
792    {14400, 2, "CLT", "CLST", "Chile/Continental"},
793    {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
794    {14400, 2, "PYT", "PYST", "America/Asuncion"},
795    {18000, 1, "CST", "CDT", "America/Havana"},
796    {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
797    {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
798    {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
799    {21600, 0, "CST", "CDT", "America/Guatemala"},
800    {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
801    {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
802    {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
803    {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
804    {32400, 1, "AKST", "AKDT", "US/Alaska"},
805    {36000, 1, "HAST", "HADT", "US/Aleutian"}
806};
807
808/*#define DEBUG_TZNAME*/
809
810static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
811{
812    int32_t idx;
813#ifdef DEBUG_TZNAME
814    fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
815#endif
816    for (idx = 0; idx < (int32_t)sizeof(OFFSET_ZONE_MAPPINGS)/sizeof(OFFSET_ZONE_MAPPINGS[0]); idx++)
817    {
818        if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
819            && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
820            && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
821            && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
822        {
823            return OFFSET_ZONE_MAPPINGS[idx].olsonID;
824        }
825    }
826    return NULL;
827}
828#endif
829
830#ifdef SEARCH_TZFILE
831#define MAX_PATH_SIZE PATH_MAX /* Set the limit for the size of the path. */
832#define MAX_READ_SIZE 512
833
834typedef struct DefaultTZInfo {
835    char* defaultTZBuffer;
836    int64_t defaultTZFileSize;
837    FILE* defaultTZFilePtr;
838    UBool defaultTZstatus;
839    int32_t defaultTZPosition;
840} DefaultTZInfo;
841
842/*
843 * This method compares the two files given to see if they are a match.
844 * It is currently use to compare two TZ files.
845 */
846static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
847    FILE* file;
848    int64_t sizeFile;
849    int64_t sizeFileLeft;
850    int32_t sizeFileRead;
851    int32_t sizeFileToRead;
852    char bufferFile[MAX_READ_SIZE];
853    UBool result = TRUE;
854
855    if (tzInfo->defaultTZFilePtr == NULL) {
856        tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
857    }
858    file = fopen(TZFileName, "r");
859
860    tzInfo->defaultTZPosition = 0; /* reset position to begin search */
861
862    if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
863        /* First check that the file size are equal. */
864        if (tzInfo->defaultTZFileSize == 0) {
865            fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
866            tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
867        }
868        fseek(file, 0, SEEK_END);
869        sizeFile = ftell(file);
870        sizeFileLeft = sizeFile;
871
872        if (sizeFile != tzInfo->defaultTZFileSize) {
873            result = FALSE;
874        } else {
875            /* Store the data from the files in seperate buffers and
876             * compare each byte to determine equality.
877             */
878            if (tzInfo->defaultTZBuffer == NULL) {
879                rewind(tzInfo->defaultTZFilePtr);
880                tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
881                fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
882            }
883            rewind(file);
884            while(sizeFileLeft > 0) {
885                uprv_memset(bufferFile, 0, MAX_READ_SIZE);
886                sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
887
888                sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
889                if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
890                    result = FALSE;
891                    break;
892                }
893                sizeFileLeft -= sizeFileRead;
894                tzInfo->defaultTZPosition += sizeFileRead;
895            }
896        }
897    } else {
898        result = FALSE;
899    }
900
901    if (file != NULL) {
902        fclose(file);
903    }
904
905    return result;
906}
907/*
908 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
909 */
910/* dirent also lists two entries: "." and ".." that we can safely ignore. */
911#define SKIP1 "."
912#define SKIP2 ".."
913static char SEARCH_TZFILE_RESULT[MAX_PATH_SIZE] = "";
914static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
915    char curpath[MAX_PATH_SIZE];
916    DIR* dirp = opendir(path);
917    DIR* subDirp = NULL;
918    struct dirent* dirEntry = NULL;
919
920    char* result = NULL;
921    if (dirp == NULL) {
922        return result;
923    }
924
925    /* Save the current path */
926    uprv_memset(curpath, 0, MAX_PATH_SIZE);
927    uprv_strcpy(curpath, path);
928
929    /* Check each entry in the directory. */
930    while((dirEntry = readdir(dirp)) != NULL) {
931        const char* dirName = dirEntry->d_name;
932        if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
933            /* Create a newpath with the new entry to test each entry in the directory. */
934            char newpath[MAX_PATH_SIZE];
935            uprv_strcpy(newpath, curpath);
936            uprv_strcat(newpath, dirName);
937
938            if ((subDirp = opendir(newpath)) != NULL) {
939                /* If this new path is a directory, make a recursive call with the newpath. */
940                closedir(subDirp);
941                uprv_strcat(newpath, "/");
942                result = searchForTZFile(newpath, tzInfo);
943                /*
944                 Have to get out here. Otherwise, we'd keep looking
945                 and return the first match in the top-level directory
946                 if there's a match in the top-level. If not, this function
947                 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
948                 It worked without this in most cases because we have a fallback of calling
949                 localtime_r to figure out the default timezone.
950                */
951                if (result != NULL)
952                    break;
953            } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
954                if(compareBinaryFiles(TZDEFAULT, newpath, tzInfo)) {
955                    const char* zoneid = newpath + (sizeof(TZZONEINFO)) - 1;
956                    skipZoneIDPrefix(&zoneid);
957                    uprv_strcpy(SEARCH_TZFILE_RESULT, zoneid);
958                    result = SEARCH_TZFILE_RESULT;
959                    /* Get out after the first one found. */
960                    break;
961                }
962            }
963        }
964    }
965    closedir(dirp);
966    return result;
967}
968#endif
969U_CAPI const char* U_EXPORT2
970uprv_tzname(int n)
971{
972    const char *tzid = NULL;
973#ifdef U_WINDOWS
974    tzid = uprv_detectWindowsTimeZone();
975
976    if (tzid != NULL) {
977        return tzid;
978    }
979#else
980
981/*#if defined(U_DARWIN)
982    int ret;
983
984    tzid = getenv("TZFILE");
985    if (tzid != NULL) {
986        return tzid;
987    }
988#endif*/
989
990/* This code can be temporarily disabled to test tzname resolution later on. */
991#ifndef DEBUG_TZNAME
992    tzid = getenv("TZ");
993    if (tzid != NULL && isValidOlsonID(tzid))
994    {
995        /* This might be a good Olson ID. */
996        skipZoneIDPrefix(&tzid);
997        return tzid;
998    }
999    /* else U_TZNAME will give a better result. */
1000#endif
1001
1002#if defined(CHECK_LOCALTIME_LINK)
1003    /* Caller must handle threading issues */
1004    if (gTimeZoneBufferPtr == NULL) {
1005        /*
1006        This is a trick to look at the name of the link to get the Olson ID
1007        because the tzfile contents is underspecified.
1008        This isn't guaranteed to work because it may not be a symlink.
1009        */
1010        int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer));
1011        if (0 < ret) {
1012            int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1013            gTimeZoneBuffer[ret] = 0;
1014            if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1015                && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1016            {
1017                return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1018            }
1019        } else {
1020#if defined(SEARCH_TZFILE)
1021            DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1022            if (tzInfo != NULL) {
1023                tzInfo->defaultTZBuffer = NULL;
1024                tzInfo->defaultTZFileSize = 0;
1025                tzInfo->defaultTZFilePtr = NULL;
1026                tzInfo->defaultTZstatus = FALSE;
1027                tzInfo->defaultTZPosition = 0;
1028
1029                gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1030
1031                /* Free previously allocated memory */
1032                if (tzInfo->defaultTZBuffer != NULL) {
1033                    uprv_free(tzInfo->defaultTZBuffer);
1034                }
1035                if (tzInfo->defaultTZFilePtr != NULL) {
1036                    fclose(tzInfo->defaultTZFilePtr);
1037                }
1038                uprv_free(tzInfo);
1039            }
1040
1041            if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1042                return gTimeZoneBufferPtr;
1043            }
1044#endif
1045        }
1046    }
1047    else {
1048        return gTimeZoneBufferPtr;
1049    }
1050#endif
1051#endif
1052
1053#ifdef U_TZNAME
1054#ifdef U_WINDOWS
1055    /* The return value is free'd in timezone.cpp on Windows because
1056     * the other code path returns a pointer to a heap location. */
1057    return uprv_strdup(U_TZNAME[n]);
1058#else
1059    /*
1060    U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1061    So we remap the abbreviation to an olson ID.
1062
1063    Since Windows exposes a little more timezone information,
1064    we normally don't use this code on Windows because
1065    uprv_detectWindowsTimeZone should have already given the correct answer.
1066    */
1067    {
1068        struct tm juneSol, decemberSol;
1069        int daylightType;
1070        static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1071        static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1072
1073        /* This probing will tell us when daylight savings occurs.  */
1074        localtime_r(&juneSolstice, &juneSol);
1075        localtime_r(&decemberSolstice, &decemberSol);
1076        daylightType = ((decemberSol.tm_isdst > 0) << 1) | (juneSol.tm_isdst > 0);
1077        tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1078        if (tzid != NULL) {
1079            return tzid;
1080        }
1081    }
1082    return U_TZNAME[n];
1083#endif
1084#else
1085    return "";
1086#endif
1087}
1088
1089/* Get and set the ICU data directory --------------------------------------- */
1090
1091static char *gDataDirectory = NULL;
1092#if U_POSIX_LOCALE
1093 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1094#endif
1095
1096static UBool U_CALLCONV putil_cleanup(void)
1097{
1098    if (gDataDirectory && *gDataDirectory) {
1099        uprv_free(gDataDirectory);
1100    }
1101    gDataDirectory = NULL;
1102#if U_POSIX_LOCALE
1103    if (gCorrectedPOSIXLocale) {
1104        uprv_free(gCorrectedPOSIXLocale);
1105        gCorrectedPOSIXLocale = NULL;
1106    }
1107#endif
1108    return TRUE;
1109}
1110
1111/*
1112 * Set the data directory.
1113 *    Make a copy of the passed string, and set the global data dir to point to it.
1114 *    TODO:  see bug #2849, regarding thread safety.
1115 */
1116U_CAPI void U_EXPORT2
1117u_setDataDirectory(const char *directory) {
1118    char *newDataDir;
1119    int32_t length;
1120
1121    if(directory==NULL || *directory==0) {
1122        /* A small optimization to prevent the malloc and copy when the
1123        shared library is used, and this is a way to make sure that NULL
1124        is never returned.
1125        */
1126        newDataDir = (char *)"";
1127    }
1128    else {
1129        length=(int32_t)uprv_strlen(directory);
1130        newDataDir = (char *)uprv_malloc(length + 2);
1131        /* Exit out if newDataDir could not be created. */
1132        if (newDataDir == NULL) {
1133            return;
1134        }
1135        uprv_strcpy(newDataDir, directory);
1136
1137#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1138        {
1139            char *p;
1140            while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1141                *p = U_FILE_SEP_CHAR;
1142            }
1143        }
1144#endif
1145    }
1146
1147    umtx_lock(NULL);
1148    if (gDataDirectory && *gDataDirectory) {
1149        uprv_free(gDataDirectory);
1150    }
1151    gDataDirectory = newDataDir;
1152    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1153    umtx_unlock(NULL);
1154}
1155
1156U_CAPI UBool U_EXPORT2
1157uprv_pathIsAbsolute(const char *path)
1158{
1159  if(!path || !*path) {
1160    return FALSE;
1161  }
1162
1163  if(*path == U_FILE_SEP_CHAR) {
1164    return TRUE;
1165  }
1166
1167#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1168  if(*path == U_FILE_ALT_SEP_CHAR) {
1169    return TRUE;
1170  }
1171#endif
1172
1173#if defined(U_WINDOWS)
1174  if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1175       ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1176      path[1] == ':' ) {
1177    return TRUE;
1178  }
1179#endif
1180
1181  return FALSE;
1182}
1183
1184/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1185   until some client wrapper makefiles are updated */
1186#if defined(U_DARWIN) && TARGET_IPHONE_SIMULATOR
1187# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1188#  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1189# endif
1190#endif
1191
1192U_CAPI const char * U_EXPORT2
1193u_getDataDirectory(void) {
1194    const char *path = NULL;
1195#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1196    char datadir_path_buffer[PATH_MAX];
1197#endif
1198
1199    /* if we have the directory, then return it immediately */
1200    UMTX_CHECK(NULL, gDataDirectory, path);
1201
1202    if(path) {
1203        return path;
1204    }
1205
1206    /*
1207    When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1208    override ICU's data with the ICU_DATA environment variable. This prevents
1209    problems where multiple custom copies of ICU's specific version of data
1210    are installed on a system. Either the application must define the data
1211    directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1212    ICU, set the data with udata_setCommonData or trust that all of the
1213    required data is contained in ICU's data library that contains
1214    the entry point defined by U_ICUDATA_ENTRY_POINT.
1215
1216    There may also be some platforms where environment variables
1217    are not allowed.
1218    */
1219#   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1220    /* First try to get the environment variable */
1221    path=getenv("ICU_DATA");
1222#   endif
1223
1224    /* ICU_DATA_DIR may be set as a compile option.
1225     * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1226     * and is used only when data is built in archive mode eliminating the need
1227     * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1228     * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1229     * set their own path.
1230     */
1231#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1232    if(path==NULL || *path==0) {
1233# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1234        const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1235# endif
1236# ifdef ICU_DATA_DIR
1237        path=ICU_DATA_DIR;
1238# else
1239        path=U_ICU_DATA_DEFAULT_DIR;
1240# endif
1241# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1242        if (prefix != NULL) {
1243            snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1244            path=datadir_path_buffer;
1245        }
1246# endif
1247    }
1248#endif
1249
1250    if(path==NULL) {
1251        /* It looks really bad, set it to something. */
1252        path = "";
1253    }
1254
1255    u_setDataDirectory(path);
1256    return gDataDirectory;
1257}
1258
1259
1260
1261
1262
1263/* Macintosh-specific locale information ------------------------------------ */
1264#ifdef XP_MAC
1265
1266typedef struct {
1267    int32_t script;
1268    int32_t region;
1269    int32_t lang;
1270    int32_t date_region;
1271    const char* posixID;
1272} mac_lc_rec;
1273
1274/* Todo: This will be updated with a newer version from www.unicode.org web
1275   page when it's available.*/
1276#define MAC_LC_MAGIC_NUMBER -5
1277#define MAC_LC_INIT_NUMBER -9
1278
1279static const mac_lc_rec mac_lc_recs[] = {
1280    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 0, "en_US",
1281    /* United States*/
1282    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 1, "fr_FR",
1283    /* France*/
1284    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 2, "en_GB",
1285    /* Great Britain*/
1286    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 3, "de_DE",
1287    /* Germany*/
1288    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 4, "it_IT",
1289    /* Italy*/
1290    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 5, "nl_NL",
1291    /* Metherlands*/
1292    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 6, "fr_BE",
1293    /* French for Belgium or Lxembourg*/
1294    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 7, "sv_SE",
1295    /* Sweden*/
1296    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 9, "da_DK",
1297    /* Denmark*/
1298    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 10, "pt_PT",
1299    /* Portugal*/
1300    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 11, "fr_CA",
1301    /* French Canada*/
1302    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 13, "is_IS",
1303    /* Israel*/
1304    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 14, "ja_JP",
1305    /* Japan*/
1306    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 15, "en_AU",
1307    /* Australia*/
1308    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 16, "ar_AE",
1309    /* the Arabic world (?)*/
1310    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 17, "fi_FI",
1311    /* Finland*/
1312    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 18, "fr_CH",
1313    /* French for Switzerland*/
1314    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 19, "de_CH",
1315    /* German for Switzerland*/
1316    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 20, "el_GR",
1317    /* Greece*/
1318    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 21, "is_IS",
1319    /* Iceland ===*/
1320    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 22, "",*/
1321    /* Malta ===*/
1322    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 23, "",*/
1323    /* Cyprus ===*/
1324    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 24, "tr_TR",
1325    /* Turkey ===*/
1326    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 25, "sh_YU",
1327    /* Croatian system for Yugoslavia*/
1328    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 33, "",*/
1329    /* Hindi system for India*/
1330    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 34, "",*/
1331    /* Pakistan*/
1332    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 41, "lt_LT",
1333    /* Lithuania*/
1334    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 42, "pl_PL",
1335    /* Poland*/
1336    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 43, "hu_HU",
1337    /* Hungary*/
1338    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 44, "et_EE",
1339    /* Estonia*/
1340    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 45, "lv_LV",
1341    /* Latvia*/
1342    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 46, "",*/
1343    /* Lapland  [Ask Rich for the data. HS]*/
1344    /*MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 47, "",*/
1345    /* Faeroe Islands*/
1346    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 48, "fa_IR",
1347    /* Iran*/
1348    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 49, "ru_RU",
1349    /* Russia*/
1350    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 50, "en_IE",
1351    /* Ireland*/
1352    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 51, "ko_KR",
1353    /* Korea*/
1354    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 52, "zh_CN",
1355    /* People's Republic of China*/
1356    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 53, "zh_TW",
1357    /* Taiwan*/
1358    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, 54, "th_TH",
1359    /* Thailand*/
1360
1361    /* fallback is en_US*/
1362    MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER, MAC_LC_MAGIC_NUMBER,
1363    MAC_LC_MAGIC_NUMBER, "en_US"
1364};
1365
1366#endif
1367
1368#if U_POSIX_LOCALE
1369/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1370 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1371 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1372 */
1373static const char *uprv_getPOSIXIDForCategory(int category)
1374{
1375    const char* posixID = NULL;
1376    if (category == LC_MESSAGES || category == LC_CTYPE) {
1377        /*
1378        * On Solaris two different calls to setlocale can result in
1379        * different values. Only get this value once.
1380        *
1381        * We must check this first because an application can set this.
1382        *
1383        * LC_ALL can't be used because it's platform dependent. The LANG
1384        * environment variable seems to affect LC_CTYPE variable by default.
1385        * Here is what setlocale(LC_ALL, NULL) can return.
1386        * HPUX can return 'C C C C C C C'
1387        * Solaris can return /en_US/C/C/C/C/C on the second try.
1388        * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1389        *
1390        * The default codepage detection also needs to use LC_CTYPE.
1391        *
1392        * Do not call setlocale(LC_*, "")! Using an empty string instead
1393        * of NULL, will modify the libc behavior.
1394        */
1395        posixID = setlocale(category, NULL);
1396        if ((posixID == 0)
1397            || (uprv_strcmp("C", posixID) == 0)
1398            || (uprv_strcmp("POSIX", posixID) == 0))
1399        {
1400            /* Maybe we got some garbage.  Try something more reasonable */
1401            posixID = getenv("LC_ALL");
1402            if (posixID == 0) {
1403                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1404                if (posixID == 0) {
1405                    posixID = getenv("LANG");
1406                }
1407            }
1408        }
1409    }
1410    if ((posixID==0)
1411        || (uprv_strcmp("C", posixID) == 0)
1412        || (uprv_strcmp("POSIX", posixID) == 0))
1413    {
1414        /* Nothing worked.  Give it a nice POSIX default value. */
1415        posixID = "en_US_POSIX";
1416    }
1417    return posixID;
1418}
1419
1420/* Return just the POSIX id for the default locale, whatever happens to be in
1421 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1422 */
1423static const char *uprv_getPOSIXIDForDefaultLocale(void)
1424{
1425    static const char* posixID = NULL;
1426    if (posixID == 0) {
1427        posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1428    }
1429    return posixID;
1430}
1431
1432/* Return just the POSIX id for the default codepage, whatever happens to be in
1433 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1434 */
1435static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1436{
1437    static const char* posixID = NULL;
1438    if (posixID == 0) {
1439        posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1440    }
1441    return posixID;
1442}
1443#endif
1444
1445/* NOTE: The caller should handle thread safety */
1446U_CAPI const char* U_EXPORT2
1447uprv_getDefaultLocaleID()
1448{
1449#if U_POSIX_LOCALE
1450/*
1451  Note that:  (a '!' means the ID is improper somehow)
1452     LC_ALL  ---->     default_loc          codepage
1453--------------------------------------------------------
1454     ab.CD             ab                   CD
1455     ab@CD             ab__CD               -
1456     ab@CD.EF          ab__CD               EF
1457
1458     ab_CD.EF@GH       ab_CD_GH             EF
1459
1460Some 'improper' ways to do the same as above:
1461  !  ab_CD@GH.EF       ab_CD_GH             EF
1462  !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1463  !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1464
1465     _CD@GH            _CD_GH               -
1466     _CD.EF@GH         _CD_GH               EF
1467
1468The variant cannot have dots in it.
1469The 'rightmost' variant (@xxx) wins.
1470The leftmost codepage (.xxx) wins.
1471*/
1472    char *correctedPOSIXLocale = 0;
1473    const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1474    const char *p;
1475    const char *q;
1476    int32_t len;
1477
1478    /* Format: (no spaces)
1479    ll [ _CC ] [ . MM ] [ @ VV]
1480
1481      l = lang, C = ctry, M = charmap, V = variant
1482    */
1483
1484    if (gCorrectedPOSIXLocale != NULL) {
1485        return gCorrectedPOSIXLocale;
1486    }
1487
1488    if ((p = uprv_strchr(posixID, '.')) != NULL) {
1489        /* assume new locale can't be larger than old one? */
1490        correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1491        /* Exit on memory allocation error. */
1492        if (correctedPOSIXLocale == NULL) {
1493            return NULL;
1494        }
1495        uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1496        correctedPOSIXLocale[p-posixID] = 0;
1497
1498        /* do not copy after the @ */
1499        if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1500            correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1501        }
1502    }
1503
1504    /* Note that we scan the *uncorrected* ID. */
1505    if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1506        if (correctedPOSIXLocale == NULL) {
1507            correctedPOSIXLocale = uprv_malloc(uprv_strlen(posixID)+1);
1508            /* Exit on memory allocation error. */
1509            if (correctedPOSIXLocale == NULL) {
1510                return NULL;
1511            }
1512            uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1513            correctedPOSIXLocale[p-posixID] = 0;
1514        }
1515        p++;
1516
1517        /* Take care of any special cases here.. */
1518        if (!uprv_strcmp(p, "nynorsk")) {
1519            p = "NY";
1520            /* Don't worry about no__NY. In practice, it won't appear. */
1521        }
1522
1523        if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1524            uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1525        }
1526        else {
1527            uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1528        }
1529
1530        if ((q = uprv_strchr(p, '.')) != NULL) {
1531            /* How big will the resulting string be? */
1532            len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1533            uprv_strncat(correctedPOSIXLocale, p, q-p);
1534            correctedPOSIXLocale[len] = 0;
1535        }
1536        else {
1537            /* Anything following the @ sign */
1538            uprv_strcat(correctedPOSIXLocale, p);
1539        }
1540
1541        /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1542         * How about 'russian' -> 'ru'?
1543         * Many of the other locales using ISO codes will be handled by the
1544         * canonicalization functions in uloc_getDefault.
1545         */
1546    }
1547
1548    /* Was a correction made? */
1549    if (correctedPOSIXLocale != NULL) {
1550        posixID = correctedPOSIXLocale;
1551    }
1552    else {
1553        /* copy it, just in case the original pointer goes away.  See j2395 */
1554        correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1555        /* Exit on memory allocation error. */
1556        if (correctedPOSIXLocale == NULL) {
1557            return NULL;
1558        }
1559        posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1560    }
1561
1562    if (gCorrectedPOSIXLocale == NULL) {
1563        gCorrectedPOSIXLocale = correctedPOSIXLocale;
1564        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1565        correctedPOSIXLocale = NULL;
1566    }
1567
1568    if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1569        uprv_free(correctedPOSIXLocale);
1570    }
1571
1572    return posixID;
1573
1574#elif defined(U_WINDOWS)
1575    UErrorCode status = U_ZERO_ERROR;
1576    LCID id = GetThreadLocale();
1577    const char* locID = uprv_convertToPosix(id, &status);
1578
1579    if (U_FAILURE(status)) {
1580        locID = "en_US";
1581    }
1582    return locID;
1583
1584#elif defined(XP_MAC)
1585    int32_t script = MAC_LC_INIT_NUMBER;
1586    /* = IntlScript(); or GetScriptManagerVariable(smSysScript);*/
1587    int32_t region = MAC_LC_INIT_NUMBER;
1588    /* = GetScriptManagerVariable(smRegionCode);*/
1589    int32_t lang = MAC_LC_INIT_NUMBER;
1590    /* = GetScriptManagerVariable(smScriptLang);*/
1591    int32_t date_region = MAC_LC_INIT_NUMBER;
1592    const char* posixID = 0;
1593    int32_t count = sizeof(mac_lc_recs) / sizeof(mac_lc_rec);
1594    int32_t i;
1595    Intl1Hndl ih;
1596
1597    ih = (Intl1Hndl) GetIntlResource(1);
1598    if (ih)
1599        date_region = ((uint16_t)(*ih)->intl1Vers) >> 8;
1600
1601    for (i = 0; i < count; i++) {
1602        if (   ((mac_lc_recs[i].script == MAC_LC_MAGIC_NUMBER)
1603             || (mac_lc_recs[i].script == script))
1604            && ((mac_lc_recs[i].region == MAC_LC_MAGIC_NUMBER)
1605             || (mac_lc_recs[i].region == region))
1606            && ((mac_lc_recs[i].lang == MAC_LC_MAGIC_NUMBER)
1607             || (mac_lc_recs[i].lang == lang))
1608            && ((mac_lc_recs[i].date_region == MAC_LC_MAGIC_NUMBER)
1609             || (mac_lc_recs[i].date_region == date_region))
1610            )
1611        {
1612            posixID = mac_lc_recs[i].posixID;
1613            break;
1614        }
1615    }
1616
1617    return posixID;
1618
1619#elif defined(OS400)
1620    /* locales are process scoped and are by definition thread safe */
1621    static char correctedLocale[64];
1622    const  char *localeID = getenv("LC_ALL");
1623           char *p;
1624
1625    if (localeID == NULL)
1626        localeID = getenv("LANG");
1627    if (localeID == NULL)
1628        localeID = setlocale(LC_ALL, NULL);
1629    /* Make sure we have something... */
1630    if (localeID == NULL)
1631        return "en_US_POSIX";
1632
1633    /* Extract the locale name from the path. */
1634    if((p = uprv_strrchr(localeID, '/')) != NULL)
1635    {
1636        /* Increment p to start of locale name. */
1637        p++;
1638        localeID = p;
1639    }
1640
1641    /* Copy to work location. */
1642    uprv_strcpy(correctedLocale, localeID);
1643
1644    /* Strip off the '.locale' extension. */
1645    if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1646        *p = 0;
1647    }
1648
1649    /* Upper case the locale name. */
1650    T_CString_toUpperCase(correctedLocale);
1651
1652    /* See if we are using the POSIX locale.  Any of the
1653    * following are equivalent and use the same QLGPGCMA
1654    * (POSIX) locale.
1655    * QLGPGCMA2 means UCS2
1656    * QLGPGCMA_4 means UTF-32
1657    * QLGPGCMA_8 means UTF-8
1658    */
1659    if ((uprv_strcmp("C", correctedLocale) == 0) ||
1660        (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1661        (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1662    {
1663        uprv_strcpy(correctedLocale, "en_US_POSIX");
1664    }
1665    else
1666    {
1667        int16_t LocaleLen;
1668
1669        /* Lower case the lang portion. */
1670        for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1671        {
1672            *p = uprv_tolower(*p);
1673        }
1674
1675        /* Adjust for Euro.  After '_E' add 'URO'. */
1676        LocaleLen = uprv_strlen(correctedLocale);
1677        if (correctedLocale[LocaleLen - 2] == '_' &&
1678            correctedLocale[LocaleLen - 1] == 'E')
1679        {
1680            uprv_strcat(correctedLocale, "URO");
1681        }
1682
1683        /* If using Lotus-based locale then convert to
1684         * equivalent non Lotus.
1685         */
1686        else if (correctedLocale[LocaleLen - 2] == '_' &&
1687            correctedLocale[LocaleLen - 1] == 'L')
1688        {
1689            correctedLocale[LocaleLen - 2] = 0;
1690        }
1691
1692        /* There are separate simplified and traditional
1693         * locales called zh_HK_S and zh_HK_T.
1694         */
1695        else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1696        {
1697            uprv_strcpy(correctedLocale, "zh_HK");
1698        }
1699
1700        /* A special zh_CN_GBK locale...
1701        */
1702        else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1703        {
1704            uprv_strcpy(correctedLocale, "zh_CN");
1705        }
1706
1707    }
1708
1709    return correctedLocale;
1710#endif
1711
1712}
1713
1714#if !U_CHARSET_IS_UTF8
1715#if U_POSIX_LOCALE
1716/*
1717Due to various platform differences, one platform may specify a charset,
1718when they really mean a different charset. Remap the names so that they are
1719compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1720here. Before adding anything to this function, please consider adding unique
1721names to the ICU alias table in the data directory.
1722*/
1723static const char*
1724remapPlatformDependentCodepage(const char *locale, const char *name) {
1725    if (locale != NULL && *locale == 0) {
1726        /* Make sure that an empty locale is handled the same way. */
1727        locale = NULL;
1728    }
1729    if (name == NULL) {
1730        return NULL;
1731    }
1732#if defined(U_AIX)
1733    if (uprv_strcmp(name, "IBM-943") == 0) {
1734        /* Use the ASCII compatible ibm-943 */
1735        name = "Shift-JIS";
1736    }
1737    else if (uprv_strcmp(name, "IBM-1252") == 0) {
1738        /* Use the windows-1252 that contains the Euro */
1739        name = "IBM-5348";
1740    }
1741#elif defined(U_SOLARIS)
1742    if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1743        /* Solaris underspecifies the "EUC" name. */
1744        if (uprv_strcmp(locale, "zh_CN") == 0) {
1745            name = "EUC-CN";
1746        }
1747        else if (uprv_strcmp(locale, "zh_TW") == 0) {
1748            name = "EUC-TW";
1749        }
1750        else if (uprv_strcmp(locale, "ko_KR") == 0) {
1751            name = "EUC-KR";
1752        }
1753    }
1754    else if (uprv_strcmp(name, "eucJP") == 0) {
1755        /*
1756        ibm-954 is the best match.
1757        ibm-33722 is the default for eucJP (similar to Windows).
1758        */
1759        name = "eucjis";
1760    }
1761    else if (uprv_strcmp(name, "646") == 0) {
1762        /*
1763         * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1764         * ISO-8859-1 instead of US-ASCII(646).
1765         */
1766        name = "ISO-8859-1";
1767    }
1768#elif defined(U_DARWIN)
1769    if (locale == NULL && *name == 0) {
1770        /*
1771        No locale was specified, and an empty name was passed in.
1772        This usually indicates that nl_langinfo didn't return valid information.
1773        Mac OS X uses UTF-8 by default (especially the locale data and console).
1774        */
1775        name = "UTF-8";
1776    }
1777    else if (uprv_strcmp(name, "CP949") == 0) {
1778        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1779        name = "EUC-KR";
1780    }
1781    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1782        /*
1783         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1784         */
1785        name = "UTF-8";
1786    }
1787#elif defined(U_BSD)
1788    if (uprv_strcmp(name, "CP949") == 0) {
1789        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1790        name = "EUC-KR";
1791    }
1792#elif defined(U_HPUX)
1793    if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1794        /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1795        /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1796        name = "hkbig5";
1797    }
1798    else if (uprv_strcmp(name, "eucJP") == 0) {
1799        /*
1800        ibm-1350 is the best match, but unavailable.
1801        ibm-954 is mostly a superset of ibm-1350.
1802        ibm-33722 is the default for eucJP (similar to Windows).
1803        */
1804        name = "eucjis";
1805    }
1806#elif defined(U_LINUX)
1807    if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1808        /* Linux underspecifies the "EUC" name. */
1809        if (uprv_strcmp(locale, "korean") == 0) {
1810            name = "EUC-KR";
1811        }
1812        else if (uprv_strcmp(locale, "japanese") == 0) {
1813            /* See comment below about eucJP */
1814            name = "eucjis";
1815        }
1816    }
1817    else if (uprv_strcmp(name, "eucjp") == 0) {
1818        /*
1819        ibm-1350 is the best match, but unavailable.
1820        ibm-954 is mostly a superset of ibm-1350.
1821        ibm-33722 is the default for eucJP (similar to Windows).
1822        */
1823        name = "eucjis";
1824    }
1825    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1826            (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1827        /*
1828         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1829         */
1830        name = "UTF-8";
1831    }
1832    /*
1833     * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1834     * it by falling back to 'US-ASCII' when NULL is returned from this
1835     * function. So, we don't have to worry about it here.
1836     */
1837#endif
1838    /* return NULL when "" is passed in */
1839    if (*name == 0) {
1840        name = NULL;
1841    }
1842    return name;
1843}
1844
1845static const char*
1846getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1847{
1848    char localeBuf[100];
1849    const char *name = NULL;
1850    char *variant = NULL;
1851
1852    if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1853        size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1854        uprv_strncpy(localeBuf, localeName, localeCapacity);
1855        localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1856        name = uprv_strncpy(buffer, name+1, buffCapacity);
1857        buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1858        if ((variant = (uprv_strchr(name, '@'))) != NULL) {
1859            *variant = 0;
1860        }
1861        name = remapPlatformDependentCodepage(localeBuf, name);
1862    }
1863    return name;
1864}
1865#endif
1866
1867static const char*
1868int_getDefaultCodepage()
1869{
1870#if defined(OS400)
1871    uint32_t ccsid = 37; /* Default to ibm-37 */
1872    static char codepage[64];
1873    Qwc_JOBI0400_t jobinfo;
1874    Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1875
1876    EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1877        "*                         ", "                ", &error);
1878
1879    if (error.Bytes_Available == 0) {
1880        if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1881            ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1882        }
1883        else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1884            ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1885        }
1886        /* else use the default */
1887    }
1888    sprintf(codepage,"ibm-%d", ccsid);
1889    return codepage;
1890
1891#elif defined(OS390)
1892    static char codepage[64];
1893
1894    strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1895    strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1896    codepage[63] = 0; /* NULL terminate */
1897
1898    return codepage;
1899
1900#elif defined(XP_MAC)
1901    return "macintosh"; /* TODO: Macintosh Roman. There must be a better way. fixme! */
1902
1903#elif defined(U_WINDOWS)
1904    static char codepage[64];
1905    sprintf(codepage, "windows-%d", GetACP());
1906    return codepage;
1907
1908#elif U_POSIX_LOCALE
1909    static char codesetName[100];
1910    const char *localeName = NULL;
1911    const char *name = NULL;
1912
1913    localeName = uprv_getPOSIXIDForDefaultCodepage();
1914    uprv_memset(codesetName, 0, sizeof(codesetName));
1915#if U_HAVE_NL_LANGINFO_CODESET
1916    /* When available, check nl_langinfo first because it usually gives more
1917       useful names. It depends on LC_CTYPE.
1918       nl_langinfo may use the same buffer as setlocale. */
1919    {
1920        const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1921#if defined(U_DARWIN) || defined(U_LINUX)
1922        /*
1923         * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1924         * instead of ASCII.
1925         */
1926        if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1927            codeset = remapPlatformDependentCodepage(localeName, codeset);
1928        } else
1929#endif
1930        {
1931            codeset = remapPlatformDependentCodepage(NULL, codeset);
1932        }
1933
1934        if (codeset != NULL) {
1935            uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1936            codesetName[sizeof(codesetName)-1] = 0;
1937            return codesetName;
1938        }
1939    }
1940#endif
1941
1942    /* Use setlocale in a nice way, and then check some environment variables.
1943       Maybe the application used setlocale already.
1944    */
1945    uprv_memset(codesetName, 0, sizeof(codesetName));
1946    name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1947    if (name) {
1948        /* if we can find the codeset name from setlocale, return that. */
1949        return name;
1950    }
1951
1952    if (*codesetName == 0)
1953    {
1954        /* Everything failed. Return US ASCII (ISO 646). */
1955        (void)uprv_strcpy(codesetName, "US-ASCII");
1956    }
1957    return codesetName;
1958#else
1959    return "US-ASCII";
1960#endif
1961}
1962
1963
1964U_CAPI const char*  U_EXPORT2
1965uprv_getDefaultCodepage()
1966{
1967    static char const  *name = NULL;
1968    umtx_lock(NULL);
1969    if (name == NULL) {
1970        name = int_getDefaultCodepage();
1971    }
1972    umtx_unlock(NULL);
1973    return name;
1974}
1975#endif  /* !U_CHARSET_IS_UTF8 */
1976
1977
1978/* end of platform-specific implementation -------------- */
1979
1980/* version handling --------------------------------------------------------- */
1981
1982U_CAPI void U_EXPORT2
1983u_versionFromString(UVersionInfo versionArray, const char *versionString) {
1984    char *end;
1985    uint16_t part=0;
1986
1987    if(versionArray==NULL) {
1988        return;
1989    }
1990
1991    if(versionString!=NULL) {
1992        for(;;) {
1993            versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
1994            if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
1995                break;
1996            }
1997            versionString=end+1;
1998        }
1999    }
2000
2001    while(part<U_MAX_VERSION_LENGTH) {
2002        versionArray[part++]=0;
2003    }
2004}
2005
2006U_CAPI void U_EXPORT2
2007u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2008    if(versionArray!=NULL && versionString!=NULL) {
2009        char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2010        int32_t len = u_strlen(versionString);
2011        if(len>U_MAX_VERSION_STRING_LENGTH) {
2012            len = U_MAX_VERSION_STRING_LENGTH;
2013        }
2014        u_UCharsToChars(versionString, versionChars, len);
2015        versionChars[len]=0;
2016        u_versionFromString(versionArray, versionChars);
2017    }
2018}
2019
2020U_CAPI void U_EXPORT2
2021u_versionToString(UVersionInfo versionArray, char *versionString) {
2022    uint16_t count, part;
2023    uint8_t field;
2024
2025    if(versionString==NULL) {
2026        return;
2027    }
2028
2029    if(versionArray==NULL) {
2030        versionString[0]=0;
2031        return;
2032    }
2033
2034    /* count how many fields need to be written */
2035    for(count=4; count>0 && versionArray[count-1]==0; --count) {
2036    }
2037
2038    if(count <= 1) {
2039        count = 2;
2040    }
2041
2042    /* write the first part */
2043    /* write the decimal field value */
2044    field=versionArray[0];
2045    if(field>=100) {
2046        *versionString++=(char)('0'+field/100);
2047        field%=100;
2048    }
2049    if(field>=10) {
2050        *versionString++=(char)('0'+field/10);
2051        field%=10;
2052    }
2053    *versionString++=(char)('0'+field);
2054
2055    /* write the following parts */
2056    for(part=1; part<count; ++part) {
2057        /* write a dot first */
2058        *versionString++=U_VERSION_DELIMITER;
2059
2060        /* write the decimal field value */
2061        field=versionArray[part];
2062        if(field>=100) {
2063            *versionString++=(char)('0'+field/100);
2064            field%=100;
2065        }
2066        if(field>=10) {
2067            *versionString++=(char)('0'+field/10);
2068            field%=10;
2069        }
2070        *versionString++=(char)('0'+field);
2071    }
2072
2073    /* NUL-terminate */
2074    *versionString=0;
2075}
2076
2077U_CAPI void U_EXPORT2
2078u_getVersion(UVersionInfo versionArray) {
2079    u_versionFromString(versionArray, U_ICU_VERSION);
2080}
2081
2082/**
2083 * icucfg.h dependent code
2084 */
2085
2086#if U_ENABLE_DYLOAD
2087
2088#if defined(U_CHECK_DYLOAD)
2089
2090#if defined(HAVE_DLOPEN)
2091
2092#ifdef HAVE_DLFCN_H
2093#ifdef __MVS__
2094#ifndef __SUSV3
2095#define __SUSV3 1
2096#endif
2097#endif
2098#include <dlfcn.h>
2099#endif
2100
2101U_INTERNAL void * U_EXPORT2
2102uprv_dl_open(const char *libName, UErrorCode *status) {
2103  void *ret = NULL;
2104  if(U_FAILURE(*status)) return ret;
2105  ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2106  if(ret==NULL) {
2107#ifndef U_TRACE_DYLOAD
2108    perror("dlopen");
2109#endif
2110    *status = U_MISSING_RESOURCE_ERROR;
2111  }
2112  return ret;
2113}
2114
2115U_INTERNAL void U_EXPORT2
2116uprv_dl_close(void *lib, UErrorCode *status) {
2117  if(U_FAILURE(*status)) return;
2118  dlclose(lib);
2119}
2120
2121U_INTERNAL void* U_EXPORT2
2122uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2123  void *ret = NULL;
2124  if(U_FAILURE(*status)) return ret;
2125  ret = dlsym(lib, sym);
2126  if(ret == NULL) {
2127    *status = U_MISSING_RESOURCE_ERROR;
2128  }
2129  return ret;
2130}
2131
2132#else
2133
2134/* null (nonexistent) implementation. */
2135
2136U_INTERNAL void * U_EXPORT2
2137uprv_dl_open(const char *libName, UErrorCode *status) {
2138  if(U_FAILURE(*status)) return NULL;
2139  *status = U_UNSUPPORTED_ERROR;
2140  return NULL;
2141}
2142
2143U_INTERNAL void U_EXPORT2
2144uprv_dl_close(void *lib, UErrorCode *status) {
2145  if(U_FAILURE(*status)) return;
2146  *status = U_UNSUPPORTED_ERROR;
2147  return;
2148}
2149
2150
2151U_INTERNAL void* U_EXPORT2
2152uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2153  if(U_FAILURE(*status)) return NULL;
2154  *status = U_UNSUPPORTED_ERROR;
2155  return NULL;
2156}
2157
2158
2159
2160#endif
2161
2162#elif defined U_WINDOWS
2163
2164U_INTERNAL void * U_EXPORT2
2165uprv_dl_open(const char *libName, UErrorCode *status) {
2166  HMODULE lib = NULL;
2167
2168  if(U_FAILURE(*status)) return NULL;
2169
2170  lib = LoadLibraryA(libName);
2171
2172  if(lib==NULL) {
2173    *status = U_MISSING_RESOURCE_ERROR;
2174  }
2175
2176  return (void*)lib;
2177}
2178
2179U_INTERNAL void U_EXPORT2
2180uprv_dl_close(void *lib, UErrorCode *status) {
2181  HMODULE handle = (HMODULE)lib;
2182  if(U_FAILURE(*status)) return;
2183
2184  FreeLibrary(handle);
2185
2186  return;
2187}
2188
2189
2190U_INTERNAL void* U_EXPORT2
2191uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2192  HMODULE handle = (HMODULE)lib;
2193  void * addr = NULL;
2194
2195  if(U_FAILURE(*status) || lib==NULL) return NULL;
2196
2197  addr = GetProcAddress(handle, sym);
2198
2199  if(addr==NULL) {
2200    DWORD lastError = GetLastError();
2201    if(lastError == ERROR_PROC_NOT_FOUND) {
2202      *status = U_MISSING_RESOURCE_ERROR;
2203    } else {
2204      *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2205    }
2206  }
2207
2208  return addr;
2209}
2210
2211
2212#else
2213
2214/* No dynamic loading set. */
2215
2216U_INTERNAL void * U_EXPORT2
2217uprv_dl_open(const char *libName, UErrorCode *status) {
2218    if(U_FAILURE(*status)) return NULL;
2219    *status = U_UNSUPPORTED_ERROR;
2220    return NULL;
2221}
2222
2223U_INTERNAL void U_EXPORT2
2224uprv_dl_close(void *lib, UErrorCode *status) {
2225    if(U_FAILURE(*status)) return;
2226    *status = U_UNSUPPORTED_ERROR;
2227    return;
2228}
2229
2230
2231U_INTERNAL void* U_EXPORT2
2232uprv_dl_sym(void *lib, const char* sym, UErrorCode *status) {
2233    if(U_FAILURE(*status)) return NULL;
2234    *status = U_UNSUPPORTED_ERROR;
2235    return NULL;
2236}
2237
2238
2239#endif
2240
2241#endif /* U_ENABLE_DYLOAD */
2242
2243/*
2244 * Hey, Emacs, please set the following:
2245 *
2246 * Local Variables:
2247 * indent-tabs-mode: nil
2248 * End:
2249 *
2250 */
2251