1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6*   Copyright (C) 1997-2016, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9******************************************************************************
10*
11*  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12*
13*   Date        Name        Description
14*   04/14/97    aliu        Creation.
15*   04/24/97    aliu        Added getDefaultDataDirectory() and
16*                            getDefaultLocaleID().
17*   04/28/97    aliu        Rewritten to assume Unix and apply general methods
18*                            for assumed case.  Non-UNIX platforms must be
19*                            special-cased.  Rewrote numeric methods dealing
20*                            with NaN and Infinity to be platform independent
21*                             over all IEEE 754 platforms.
22*   05/13/97    aliu        Restored sign of timezone
23*                            (semantics are hours West of GMT)
24*   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25*                             nextDouble..
26*   07/22/98    stephen     Added remainder, max, min, trunc
27*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
28*   08/24/98    stephen     Added longBitsFromDouble
29*   09/08/98    stephen     Minor changes for Mac Port
30*   03/02/99    stephen     Removed openFile().  Added AS400 support.
31*                            Fixed EBCDIC tables
32*   04/15/99    stephen     Converted to C.
33*   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
34*   08/04/99    jeffrey R.  Added OS/2 changes
35*   11/15/99    helena      Integrated S/390 IEEE support.
36*   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
37*   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
38*   01/03/08    Steven L.   Fake Time Support
39******************************************************************************
40*/
41
42// Defines _XOPEN_SOURCE for access to POSIX functions.
43// Must be before any other #includes.
44#include "uposixdefs.h"
45
46/* include ICU headers */
47#include "unicode/utypes.h"
48#include "unicode/putil.h"
49#include "unicode/ustring.h"
50#include "putilimp.h"
51#include "uassert.h"
52#include "umutex.h"
53#include "cmemory.h"
54#include "cstring.h"
55#include "locmap.h"
56#include "ucln_cmn.h"
57#include "charstr.h"
58
59/* Include standard headers. */
60#include <stdio.h>
61#include <stdlib.h>
62#include <string.h>
63#include <math.h>
64#include <locale.h>
65#include <float.h>
66
67#ifndef U_COMMON_IMPLEMENTATION
68#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
69#endif
70
71
72/* include system headers */
73#if U_PLATFORM_USES_ONLY_WIN32_API
74    /*
75     * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
76     * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
77     * to use native APIs as much as possible?
78     */
79#   define WIN32_LEAN_AND_MEAN
80#   define VC_EXTRALEAN
81#   define NOUSER
82#   define NOSERVICE
83#   define NOIME
84#   define NOMCX
85#   include <windows.h>
86#   include "wintz.h"
87#elif U_PLATFORM == U_PF_OS400
88#   include <float.h>
89#   include <qusec.h>       /* error code structure */
90#   include <qusrjobi.h>
91#   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
92#   include <mih/testptr.h> /* For uprv_maximumPtr */
93#elif U_PLATFORM == U_PF_OS390
94#   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
95#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
96#   include <limits.h>
97#   include <unistd.h>
98#   if U_PLATFORM == U_PF_SOLARIS
99#       ifndef _XPG4_2
100#           define _XPG4_2
101#       endif
102#   endif
103#elif U_PLATFORM == U_PF_QNX
104#   include <sys/neutrino.h>
105#endif
106
107#if (U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(__STRICT_ANSI__)
108/* tzset isn't defined in strict ANSI on Cygwin and MinGW. */
109#undef __STRICT_ANSI__
110#endif
111
112/*
113 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
114 */
115#include <time.h>
116
117#if !U_PLATFORM_USES_ONLY_WIN32_API
118#include <sys/time.h>
119#endif
120
121/*
122 * Only include langinfo.h if we have a way to get the codeset. If we later
123 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
124 *
125 */
126
127#if U_HAVE_NL_LANGINFO_CODESET
128#include <langinfo.h>
129#endif
130
131/**
132 * Simple things (presence of functions, etc) should just go in configure.in and be added to
133 * icucfg.h via autoheader.
134 */
135#if U_PLATFORM_IMPLEMENTS_POSIX
136#   if U_PLATFORM == U_PF_OS400
137#    define HAVE_DLFCN_H 0
138#    define HAVE_DLOPEN 0
139#   else
140#   ifndef HAVE_DLFCN_H
141#    define HAVE_DLFCN_H 1
142#   endif
143#   ifndef HAVE_DLOPEN
144#    define HAVE_DLOPEN 1
145#   endif
146#   endif
147#   ifndef HAVE_GETTIMEOFDAY
148#    define HAVE_GETTIMEOFDAY 1
149#   endif
150#else
151#   define HAVE_DLFCN_H 0
152#   define HAVE_DLOPEN 0
153#   define HAVE_GETTIMEOFDAY 0
154#endif
155
156U_NAMESPACE_USE
157
158/* Define the extension for data files, again... */
159#define DATA_TYPE "dat"
160
161/* Leave this copyright notice here! */
162static const char copyright[] = U_COPYRIGHT_STRING;
163
164/* floating point implementations ------------------------------------------- */
165
166/* We return QNAN rather than SNAN*/
167#define SIGN 0x80000000U
168
169/* Make it easy to define certain types of constants */
170typedef union {
171    int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
172    double d64;
173} BitPatternConversion;
174static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
175static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
176
177/*---------------------------------------------------------------------------
178  Platform utilities
179  Our general strategy is to assume we're on a POSIX platform.  Platforms which
180  are non-POSIX must declare themselves so.  The default POSIX implementation
181  will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
182  functions).
183  ---------------------------------------------------------------------------*/
184
185#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
186#   undef U_POSIX_LOCALE
187#else
188#   define U_POSIX_LOCALE    1
189#endif
190
191/*
192    WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
193    can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
194*/
195#if !IEEE_754
196static char*
197u_topNBytesOfDouble(double* d, int n)
198{
199#if U_IS_BIG_ENDIAN
200    return (char*)d;
201#else
202    return (char*)(d + 1) - n;
203#endif
204}
205
206static char*
207u_bottomNBytesOfDouble(double* d, int n)
208{
209#if U_IS_BIG_ENDIAN
210    return (char*)(d + 1) - n;
211#else
212    return (char*)d;
213#endif
214}
215#endif   /* !IEEE_754 */
216
217#if IEEE_754
218static UBool
219u_signBit(double d) {
220    uint8_t hiByte;
221#if U_IS_BIG_ENDIAN
222    hiByte = *(uint8_t *)&d;
223#else
224    hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
225#endif
226    return (hiByte & 0x80) != 0;
227}
228#endif
229
230
231
232#if defined (U_DEBUG_FAKETIME)
233/* Override the clock to test things without having to move the system clock.
234 * Assumes POSIX gettimeofday() will function
235 */
236UDate fakeClock_t0 = 0; /** Time to start the clock from **/
237UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
238UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
239static UMutex fakeClockMutex = U_MUTEX_INTIALIZER;
240
241static UDate getUTCtime_real() {
242    struct timeval posixTime;
243    gettimeofday(&posixTime, NULL);
244    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
245}
246
247static UDate getUTCtime_fake() {
248    umtx_lock(&fakeClockMutex);
249    if(!fakeClock_set) {
250        UDate real = getUTCtime_real();
251        const char *fake_start = getenv("U_FAKETIME_START");
252        if((fake_start!=NULL) && (fake_start[0]!=0)) {
253            sscanf(fake_start,"%lf",&fakeClock_t0);
254            fakeClock_dt = fakeClock_t0 - real;
255            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
256                    "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
257                    fakeClock_t0, fake_start, fakeClock_dt, real);
258        } else {
259          fakeClock_dt = 0;
260            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
261                    "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
262        }
263        fakeClock_set = TRUE;
264    }
265    umtx_unlock(&fakeClockMutex);
266
267    return getUTCtime_real() + fakeClock_dt;
268}
269#endif
270
271#if U_PLATFORM_USES_ONLY_WIN32_API
272typedef union {
273    int64_t int64;
274    FILETIME fileTime;
275} FileTimeConversion;   /* This is like a ULARGE_INTEGER */
276
277/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
278#define EPOCH_BIAS  INT64_C(116444736000000000)
279#define HECTONANOSECOND_PER_MILLISECOND   10000
280
281#endif
282
283/*---------------------------------------------------------------------------
284  Universal Implementations
285  These are designed to work on all platforms.  Try these, and if they
286  don't work on your platform, then special case your platform with new
287  implementations.
288---------------------------------------------------------------------------*/
289
290U_CAPI UDate U_EXPORT2
291uprv_getUTCtime()
292{
293#if defined(U_DEBUG_FAKETIME)
294    return getUTCtime_fake(); /* Hook for overriding the clock */
295#else
296    return uprv_getRawUTCtime();
297#endif
298}
299
300/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
301U_CAPI UDate U_EXPORT2
302uprv_getRawUTCtime()
303{
304#if U_PLATFORM_USES_ONLY_WIN32_API
305
306    FileTimeConversion winTime;
307    GetSystemTimeAsFileTime(&winTime.fileTime);
308    return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
309#else
310
311#if HAVE_GETTIMEOFDAY
312    struct timeval posixTime;
313    gettimeofday(&posixTime, NULL);
314    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
315#else
316    time_t epochtime;
317    time(&epochtime);
318    return (UDate)epochtime * U_MILLIS_PER_SECOND;
319#endif
320
321#endif
322}
323
324/*-----------------------------------------------------------------------------
325  IEEE 754
326  These methods detect and return NaN and infinity values for doubles
327  conforming to IEEE 754.  Platforms which support this standard include X86,
328  Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
329  If this doesn't work on your platform, you have non-IEEE floating-point, and
330  will need to code your own versions.  A naive implementation is to return 0.0
331  for getNaN and getInfinity, and false for isNaN and isInfinite.
332  ---------------------------------------------------------------------------*/
333
334U_CAPI UBool U_EXPORT2
335uprv_isNaN(double number)
336{
337#if IEEE_754
338    BitPatternConversion convertedNumber;
339    convertedNumber.d64 = number;
340    /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
341    return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
342
343#elif U_PLATFORM == U_PF_OS390
344    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
345                        sizeof(uint32_t));
346    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
347                        sizeof(uint32_t));
348
349    return ((highBits & 0x7F080000L) == 0x7F080000L) &&
350      (lowBits == 0x00000000L);
351
352#else
353    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
354    /* you'll need to replace this default implementation with what's correct*/
355    /* for your platform.*/
356    return number != number;
357#endif
358}
359
360U_CAPI UBool U_EXPORT2
361uprv_isInfinite(double number)
362{
363#if IEEE_754
364    BitPatternConversion convertedNumber;
365    convertedNumber.d64 = number;
366    /* Infinity is exactly 0x7FF0000000000000U. */
367    return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
368#elif U_PLATFORM == U_PF_OS390
369    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
370                        sizeof(uint32_t));
371    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
372                        sizeof(uint32_t));
373
374    return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
375
376#else
377    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
378    /* value, you'll need to replace this default implementation with what's*/
379    /* correct for your platform.*/
380    return number == (2.0 * number);
381#endif
382}
383
384U_CAPI UBool U_EXPORT2
385uprv_isPositiveInfinity(double number)
386{
387#if IEEE_754 || U_PLATFORM == U_PF_OS390
388    return (UBool)(number > 0 && uprv_isInfinite(number));
389#else
390    return uprv_isInfinite(number);
391#endif
392}
393
394U_CAPI UBool U_EXPORT2
395uprv_isNegativeInfinity(double number)
396{
397#if IEEE_754 || U_PLATFORM == U_PF_OS390
398    return (UBool)(number < 0 && uprv_isInfinite(number));
399
400#else
401    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
402                        sizeof(uint32_t));
403    return((highBits & SIGN) && uprv_isInfinite(number));
404
405#endif
406}
407
408U_CAPI double U_EXPORT2
409uprv_getNaN()
410{
411#if IEEE_754 || U_PLATFORM == U_PF_OS390
412    return gNan.d64;
413#else
414    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
415    /* you'll need to replace this default implementation with what's correct*/
416    /* for your platform.*/
417    return 0.0;
418#endif
419}
420
421U_CAPI double U_EXPORT2
422uprv_getInfinity()
423{
424#if IEEE_754 || U_PLATFORM == U_PF_OS390
425    return gInf.d64;
426#else
427    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
428    /* value, you'll need to replace this default implementation with what's*/
429    /* correct for your platform.*/
430    return 0.0;
431#endif
432}
433
434U_CAPI double U_EXPORT2
435uprv_floor(double x)
436{
437    return floor(x);
438}
439
440U_CAPI double U_EXPORT2
441uprv_ceil(double x)
442{
443    return ceil(x);
444}
445
446U_CAPI double U_EXPORT2
447uprv_round(double x)
448{
449    return uprv_floor(x + 0.5);
450}
451
452U_CAPI double U_EXPORT2
453uprv_fabs(double x)
454{
455    return fabs(x);
456}
457
458U_CAPI double U_EXPORT2
459uprv_modf(double x, double* y)
460{
461    return modf(x, y);
462}
463
464U_CAPI double U_EXPORT2
465uprv_fmod(double x, double y)
466{
467    return fmod(x, y);
468}
469
470U_CAPI double U_EXPORT2
471uprv_pow(double x, double y)
472{
473    /* This is declared as "double pow(double x, double y)" */
474    return pow(x, y);
475}
476
477U_CAPI double U_EXPORT2
478uprv_pow10(int32_t x)
479{
480    return pow(10.0, (double)x);
481}
482
483U_CAPI double U_EXPORT2
484uprv_fmax(double x, double y)
485{
486#if IEEE_754
487    /* first handle NaN*/
488    if(uprv_isNaN(x) || uprv_isNaN(y))
489        return uprv_getNaN();
490
491    /* check for -0 and 0*/
492    if(x == 0.0 && y == 0.0 && u_signBit(x))
493        return y;
494
495#endif
496
497    /* this should work for all flt point w/o NaN and Inf special cases */
498    return (x > y ? x : y);
499}
500
501U_CAPI double U_EXPORT2
502uprv_fmin(double x, double y)
503{
504#if IEEE_754
505    /* first handle NaN*/
506    if(uprv_isNaN(x) || uprv_isNaN(y))
507        return uprv_getNaN();
508
509    /* check for -0 and 0*/
510    if(x == 0.0 && y == 0.0 && u_signBit(y))
511        return y;
512
513#endif
514
515    /* this should work for all flt point w/o NaN and Inf special cases */
516    return (x > y ? y : x);
517}
518
519/**
520 * Truncates the given double.
521 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
522 * This is different than calling floor() or ceil():
523 * floor(3.3) = 3, floor(-3.3) = -4
524 * ceil(3.3) = 4, ceil(-3.3) = -3
525 */
526U_CAPI double U_EXPORT2
527uprv_trunc(double d)
528{
529#if IEEE_754
530    /* handle error cases*/
531    if(uprv_isNaN(d))
532        return uprv_getNaN();
533    if(uprv_isInfinite(d))
534        return uprv_getInfinity();
535
536    if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
537        return ceil(d);
538    else
539        return floor(d);
540
541#else
542    return d >= 0 ? floor(d) : ceil(d);
543
544#endif
545}
546
547/**
548 * Return the largest positive number that can be represented by an integer
549 * type of arbitrary bit length.
550 */
551U_CAPI double U_EXPORT2
552uprv_maxMantissa(void)
553{
554    return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
555}
556
557U_CAPI double U_EXPORT2
558uprv_log(double d)
559{
560    return log(d);
561}
562
563U_CAPI void * U_EXPORT2
564uprv_maximumPtr(void * base)
565{
566#if U_PLATFORM == U_PF_OS400
567    /*
568     * With the provided function we should never be out of range of a given segment
569     * (a traditional/typical segment that is).  Our segments have 5 bytes for the
570     * id and 3 bytes for the offset.  The key is that the casting takes care of
571     * only retrieving the offset portion minus x1000.  Hence, the smallest offset
572     * seen in a program is x001000 and when casted to an int would be 0.
573     * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
574     *
575     * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
576     * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
577     * This function determines the activation based on the pointer that is passed in and
578     * calculates the appropriate maximum available size for
579     * each pointer type (TERASPACE and non-TERASPACE)
580     *
581     * Unlike other operating systems, the pointer model isn't determined at
582     * compile time on i5/OS.
583     */
584    if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
585        /* if it is a TERASPACE pointer the max is 2GB - 4k */
586        return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
587    }
588    /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
589    return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
590
591#else
592    return U_MAX_PTR(base);
593#endif
594}
595
596/*---------------------------------------------------------------------------
597  Platform-specific Implementations
598  Try these, and if they don't work on your platform, then special case your
599  platform with new implementations.
600  ---------------------------------------------------------------------------*/
601
602/* Generic time zone layer -------------------------------------------------- */
603
604/* Time zone utilities */
605U_CAPI void U_EXPORT2
606uprv_tzset()
607{
608#if defined(U_TZSET)
609    U_TZSET();
610#else
611    /* no initialization*/
612#endif
613}
614
615U_CAPI int32_t U_EXPORT2
616uprv_timezone()
617{
618#ifdef U_TIMEZONE
619    return U_TIMEZONE;
620#else
621    time_t t, t1, t2;
622    struct tm tmrec;
623    int32_t tdiff = 0;
624
625    time(&t);
626    uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
627#if U_PLATFORM != U_PF_IPHONE
628    UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
629#endif
630    t1 = mktime(&tmrec);                 /* local time in seconds*/
631    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
632    t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
633    tdiff = t2 - t1;
634
635#if U_PLATFORM != U_PF_IPHONE
636    /* imitate NT behaviour, which returns same timezone offset to GMT for
637       winter and summer.
638       This does not work on all platforms. For instance, on glibc on Linux
639       and on Mac OS 10.5, tdiff calculated above remains the same
640       regardless of whether DST is in effect or not. iOS is another
641       platform where this does not work. Linux + glibc and Mac OS 10.5
642       have U_TIMEZONE defined so that this code is not reached.
643    */
644    if (dst_checked)
645        tdiff += 3600;
646#endif
647    return tdiff;
648#endif
649}
650
651/* Note that U_TZNAME does *not* have to be tzname, but if it is,
652   some platforms need to have it declared here. */
653
654#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED || (U_PLATFORM == U_PF_CYGWIN && !U_PLATFORM_USES_ONLY_WIN32_API))
655/* RS6000 and others reject char **tzname.  */
656extern U_IMPORT char *U_TZNAME[];
657#endif
658
659#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
660/* These platforms are likely to use Olson timezone IDs. */
661#define CHECK_LOCALTIME_LINK 1
662#if U_PLATFORM_IS_DARWIN_BASED
663#include <tzfile.h>
664#define TZZONEINFO      (TZDIR "/")
665#elif U_PLATFORM == U_PF_SOLARIS
666#define TZDEFAULT       "/etc/localtime"
667#define TZZONEINFO      "/usr/share/lib/zoneinfo/"
668#define TZZONEINFO2     "../usr/share/lib/zoneinfo/"
669#define TZ_ENV_CHECK    "localtime"
670#else
671#define TZDEFAULT       "/etc/localtime"
672#define TZZONEINFO      "/usr/share/zoneinfo/"
673#endif
674#if U_HAVE_DIRENT_H
675#define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
676/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
677   symlinked to /etc/localtime, which makes searchForTZFile return
678   'localtime' when it's the first match. */
679#define TZFILE_SKIP2    "localtime"
680#define SEARCH_TZFILE
681#include <dirent.h>  /* Needed to search through system timezone files */
682#endif
683static char gTimeZoneBuffer[PATH_MAX];
684static char *gTimeZoneBufferPtr = NULL;
685#endif
686
687#if !U_PLATFORM_USES_ONLY_WIN32_API
688#define isNonDigit(ch) (ch < '0' || '9' < ch)
689static UBool isValidOlsonID(const char *id) {
690    int32_t idx = 0;
691
692    /* Determine if this is something like Iceland (Olson ID)
693    or AST4ADT (non-Olson ID) */
694    while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
695        idx++;
696    }
697
698    /* If we went through the whole string, then it might be okay.
699    The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
700    "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
701    The rest of the time it could be an Olson ID. George */
702    return (UBool)(id[idx] == 0
703        || uprv_strcmp(id, "PST8PDT") == 0
704        || uprv_strcmp(id, "MST7MDT") == 0
705        || uprv_strcmp(id, "CST6CDT") == 0
706        || uprv_strcmp(id, "EST5EDT") == 0);
707}
708
709/* On some Unix-like OS, 'posix' subdirectory in
710   /usr/share/zoneinfo replicates the top-level contents. 'right'
711   subdirectory has the same set of files, but individual files
712   are different from those in the top-level directory or 'posix'
713   because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
714   has files for UTC.
715   When the first match for /etc/localtime is in either of them
716   (usually in posix because 'right' has different file contents),
717   or TZ environment variable points to one of them, createTimeZone
718   fails because, say, 'posix/America/New_York' is not an Olson
719   timezone id ('America/New_York' is). So, we have to skip
720   'posix/' and 'right/' at the beginning. */
721static void skipZoneIDPrefix(const char** id) {
722    if (uprv_strncmp(*id, "posix/", 6) == 0
723        || uprv_strncmp(*id, "right/", 6) == 0)
724    {
725        *id += 6;
726    }
727}
728#endif
729
730#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
731
732#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
733typedef struct OffsetZoneMapping {
734    int32_t offsetSeconds;
735    int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
736    const char *stdID;
737    const char *dstID;
738    const char *olsonID;
739} OffsetZoneMapping;
740
741enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
742
743/*
744This list tries to disambiguate a set of abbreviated timezone IDs and offsets
745and maps it to an Olson ID.
746Before adding anything to this list, take a look at
747icu/source/tools/tzcode/tz.alias
748Sometimes no daylight savings (0) is important to define due to aliases.
749This list can be tested with icu/source/test/compat/tzone.pl
750More values could be added to daylightType to increase precision.
751*/
752static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
753    {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
754    {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
755    {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
756    {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
757    {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
758    {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
759    {-36000, 2, "EST", "EST", "Australia/Sydney"},
760    {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
761    {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
762    {-34200, 2, "CST", "CST", "Australia/South"},
763    {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
764    {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
765    {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
766    {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
767    {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
768    {-28800, 2, "WST", "WST", "Australia/West"},
769    {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
770    {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
771    {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
772    {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
773    {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
774    {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
775    {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
776    {-14400, 1, "AZT", "AZST", "Asia/Baku"},
777    {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
778    {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
779    {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
780    {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
781    {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
782    {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
783    {-3600, 0, "CET", "WEST", "Africa/Algiers"},
784    {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
785    {0, 1, "GMT", "IST", "Europe/Dublin"},
786    {0, 1, "GMT", "BST", "Europe/London"},
787    {0, 0, "WET", "WEST", "Africa/Casablanca"},
788    {0, 0, "WET", "WET", "Africa/El_Aaiun"},
789    {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
790    {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
791    {10800, 1, "PMST", "PMDT", "America/Miquelon"},
792    {10800, 2, "UYT", "UYST", "America/Montevideo"},
793    {10800, 1, "WGT", "WGST", "America/Godthab"},
794    {10800, 2, "BRT", "BRST", "Brazil/East"},
795    {12600, 1, "NST", "NDT", "America/St_Johns"},
796    {14400, 1, "AST", "ADT", "Canada/Atlantic"},
797    {14400, 2, "AMT", "AMST", "America/Cuiaba"},
798    {14400, 2, "CLT", "CLST", "Chile/Continental"},
799    {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
800    {14400, 2, "PYT", "PYST", "America/Asuncion"},
801    {18000, 1, "CST", "CDT", "America/Havana"},
802    {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
803    {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
804    {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
805    {21600, 0, "CST", "CDT", "America/Guatemala"},
806    {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
807    {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
808    {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
809    {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
810    {32400, 1, "AKST", "AKDT", "US/Alaska"},
811    {36000, 1, "HAST", "HADT", "US/Aleutian"}
812};
813
814/*#define DEBUG_TZNAME*/
815
816static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
817{
818    int32_t idx;
819#ifdef DEBUG_TZNAME
820    fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
821#endif
822    for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
823    {
824        if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
825            && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
826            && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
827            && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
828        {
829            return OFFSET_ZONE_MAPPINGS[idx].olsonID;
830        }
831    }
832    return NULL;
833}
834#endif
835
836#ifdef SEARCH_TZFILE
837#define MAX_READ_SIZE 512
838
839typedef struct DefaultTZInfo {
840    char* defaultTZBuffer;
841    int64_t defaultTZFileSize;
842    FILE* defaultTZFilePtr;
843    UBool defaultTZstatus;
844    int32_t defaultTZPosition;
845} DefaultTZInfo;
846
847/*
848 * This method compares the two files given to see if they are a match.
849 * It is currently use to compare two TZ files.
850 */
851static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
852    FILE* file;
853    int64_t sizeFile;
854    int64_t sizeFileLeft;
855    int32_t sizeFileRead;
856    int32_t sizeFileToRead;
857    char bufferFile[MAX_READ_SIZE];
858    UBool result = TRUE;
859
860    if (tzInfo->defaultTZFilePtr == NULL) {
861        tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
862    }
863    file = fopen(TZFileName, "r");
864
865    tzInfo->defaultTZPosition = 0; /* reset position to begin search */
866
867    if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
868        /* First check that the file size are equal. */
869        if (tzInfo->defaultTZFileSize == 0) {
870            fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
871            tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
872        }
873        fseek(file, 0, SEEK_END);
874        sizeFile = ftell(file);
875        sizeFileLeft = sizeFile;
876
877        if (sizeFile != tzInfo->defaultTZFileSize) {
878            result = FALSE;
879        } else {
880            /* Store the data from the files in seperate buffers and
881             * compare each byte to determine equality.
882             */
883            if (tzInfo->defaultTZBuffer == NULL) {
884                rewind(tzInfo->defaultTZFilePtr);
885                tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
886                sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
887            }
888            rewind(file);
889            while(sizeFileLeft > 0) {
890                uprv_memset(bufferFile, 0, MAX_READ_SIZE);
891                sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
892
893                sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
894                if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
895                    result = FALSE;
896                    break;
897                }
898                sizeFileLeft -= sizeFileRead;
899                tzInfo->defaultTZPosition += sizeFileRead;
900            }
901        }
902    } else {
903        result = FALSE;
904    }
905
906    if (file != NULL) {
907        fclose(file);
908    }
909
910    return result;
911}
912
913
914/* dirent also lists two entries: "." and ".." that we can safely ignore. */
915#define SKIP1 "."
916#define SKIP2 ".."
917static UBool U_CALLCONV putil_cleanup(void);
918static CharString *gSearchTZFileResult = NULL;
919
920/*
921 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
922 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
923 */
924static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
925    DIR* dirp = opendir(path);
926    DIR* subDirp = NULL;
927    struct dirent* dirEntry = NULL;
928
929    char* result = NULL;
930    if (dirp == NULL) {
931        return result;
932    }
933
934    if (gSearchTZFileResult == NULL) {
935        gSearchTZFileResult = new CharString;
936        if (gSearchTZFileResult == NULL) {
937            return NULL;
938        }
939        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
940    }
941
942    /* Save the current path */
943    UErrorCode status = U_ZERO_ERROR;
944    CharString curpath(path, -1, status);
945    if (U_FAILURE(status)) {
946        return NULL;
947    }
948
949    /* Check each entry in the directory. */
950    while((dirEntry = readdir(dirp)) != NULL) {
951        const char* dirName = dirEntry->d_name;
952        if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0) {
953            /* Create a newpath with the new entry to test each entry in the directory. */
954            CharString newpath(curpath, status);
955            newpath.append(dirName, -1, status);
956            if (U_FAILURE(status)) {
957                return NULL;
958            }
959
960            if ((subDirp = opendir(newpath.data())) != NULL) {
961                /* If this new path is a directory, make a recursive call with the newpath. */
962                closedir(subDirp);
963                newpath.append('/', status);
964                if (U_FAILURE(status)) {
965                    return NULL;
966                }
967                result = searchForTZFile(newpath.data(), tzInfo);
968                /*
969                 Have to get out here. Otherwise, we'd keep looking
970                 and return the first match in the top-level directory
971                 if there's a match in the top-level. If not, this function
972                 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
973                 It worked without this in most cases because we have a fallback of calling
974                 localtime_r to figure out the default timezone.
975                */
976                if (result != NULL)
977                    break;
978            } else if (uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
979                if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
980                    int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
981                    if (amountToSkip > newpath.length()) {
982                        amountToSkip = newpath.length();
983                    }
984                    const char* zoneid = newpath.data() + amountToSkip;
985                    skipZoneIDPrefix(&zoneid);
986                    gSearchTZFileResult->clear();
987                    gSearchTZFileResult->append(zoneid, -1, status);
988                    if (U_FAILURE(status)) {
989                        return NULL;
990                    }
991                    result = gSearchTZFileResult->data();
992                    /* Get out after the first one found. */
993                    break;
994                }
995            }
996        }
997    }
998    closedir(dirp);
999    return result;
1000}
1001#endif
1002
1003U_CAPI void U_EXPORT2
1004uprv_tzname_clear_cache()
1005{
1006#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1007    gTimeZoneBufferPtr = NULL;
1008#endif
1009}
1010
1011U_CAPI const char* U_EXPORT2
1012uprv_tzname(int n)
1013{
1014    const char *tzid = NULL;
1015#if U_PLATFORM_USES_ONLY_WIN32_API
1016    tzid = uprv_detectWindowsTimeZone();
1017
1018    if (tzid != NULL) {
1019        return tzid;
1020    }
1021#else
1022
1023/*#if U_PLATFORM_IS_DARWIN_BASED
1024    int ret;
1025
1026    tzid = getenv("TZFILE");
1027    if (tzid != NULL) {
1028        return tzid;
1029    }
1030#endif*/
1031
1032/* This code can be temporarily disabled to test tzname resolution later on. */
1033#ifndef DEBUG_TZNAME
1034    tzid = getenv("TZ");
1035    if (tzid != NULL && isValidOlsonID(tzid)
1036#if U_PLATFORM == U_PF_SOLARIS
1037    /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1038        && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1039#endif
1040    ) {
1041        /* The colon forces tzset() to treat the remainder as zoneinfo path */
1042        if (tzid[0] == ':') {
1043            tzid++;
1044        }
1045        /* This might be a good Olson ID. */
1046        skipZoneIDPrefix(&tzid);
1047        return tzid;
1048    }
1049    /* else U_TZNAME will give a better result. */
1050#endif
1051
1052#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1053    /* Caller must handle threading issues */
1054    if (gTimeZoneBufferPtr == NULL) {
1055        /*
1056        This is a trick to look at the name of the link to get the Olson ID
1057        because the tzfile contents is underspecified.
1058        This isn't guaranteed to work because it may not be a symlink.
1059        */
1060        int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
1061        if (0 < ret) {
1062            int32_t tzZoneInfoLen = uprv_strlen(TZZONEINFO);
1063            gTimeZoneBuffer[ret] = 0;
1064            if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO, tzZoneInfoLen) == 0
1065                && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1066            {
1067                return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1068            }
1069#if U_PLATFORM == U_PF_SOLARIS
1070            else
1071            {
1072                tzZoneInfoLen = uprv_strlen(TZZONEINFO2);
1073                if (uprv_strncmp(gTimeZoneBuffer, TZZONEINFO2, tzZoneInfoLen) == 0
1074                                && isValidOlsonID(gTimeZoneBuffer + tzZoneInfoLen))
1075                {
1076                    return (gTimeZoneBufferPtr = gTimeZoneBuffer + tzZoneInfoLen);
1077                }
1078            }
1079#endif
1080        } else {
1081#if defined(SEARCH_TZFILE)
1082            DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1083            if (tzInfo != NULL) {
1084                tzInfo->defaultTZBuffer = NULL;
1085                tzInfo->defaultTZFileSize = 0;
1086                tzInfo->defaultTZFilePtr = NULL;
1087                tzInfo->defaultTZstatus = FALSE;
1088                tzInfo->defaultTZPosition = 0;
1089
1090                gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1091
1092                /* Free previously allocated memory */
1093                if (tzInfo->defaultTZBuffer != NULL) {
1094                    uprv_free(tzInfo->defaultTZBuffer);
1095                }
1096                if (tzInfo->defaultTZFilePtr != NULL) {
1097                    fclose(tzInfo->defaultTZFilePtr);
1098                }
1099                uprv_free(tzInfo);
1100            }
1101
1102            if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1103                return gTimeZoneBufferPtr;
1104            }
1105#endif
1106        }
1107    }
1108    else {
1109        return gTimeZoneBufferPtr;
1110    }
1111#endif
1112#endif
1113
1114#ifdef U_TZNAME
1115#if U_PLATFORM_USES_ONLY_WIN32_API
1116    /* The return value is free'd in timezone.cpp on Windows because
1117     * the other code path returns a pointer to a heap location. */
1118    return uprv_strdup(U_TZNAME[n]);
1119#else
1120    /*
1121    U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1122    So we remap the abbreviation to an olson ID.
1123
1124    Since Windows exposes a little more timezone information,
1125    we normally don't use this code on Windows because
1126    uprv_detectWindowsTimeZone should have already given the correct answer.
1127    */
1128    {
1129        struct tm juneSol, decemberSol;
1130        int daylightType;
1131        static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1132        static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1133
1134        /* This probing will tell us when daylight savings occurs.  */
1135        localtime_r(&juneSolstice, &juneSol);
1136        localtime_r(&decemberSolstice, &decemberSol);
1137        if(decemberSol.tm_isdst > 0) {
1138          daylightType = U_DAYLIGHT_DECEMBER;
1139        } else if(juneSol.tm_isdst > 0) {
1140          daylightType = U_DAYLIGHT_JUNE;
1141        } else {
1142          daylightType = U_DAYLIGHT_NONE;
1143        }
1144        tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1145        if (tzid != NULL) {
1146            return tzid;
1147        }
1148    }
1149    return U_TZNAME[n];
1150#endif
1151#else
1152    return "";
1153#endif
1154}
1155
1156/* Get and set the ICU data directory --------------------------------------- */
1157
1158static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
1159static char *gDataDirectory = NULL;
1160
1161UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1162static CharString *gTimeZoneFilesDirectory = NULL;
1163
1164#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1165 static char *gCorrectedPOSIXLocale = NULL; /* Heap allocated */
1166#endif
1167
1168static UBool U_CALLCONV putil_cleanup(void)
1169{
1170    if (gDataDirectory && *gDataDirectory) {
1171        uprv_free(gDataDirectory);
1172    }
1173    gDataDirectory = NULL;
1174    gDataDirInitOnce.reset();
1175
1176    delete gTimeZoneFilesDirectory;
1177    gTimeZoneFilesDirectory = NULL;
1178    gTimeZoneFilesInitOnce.reset();
1179
1180#ifdef SEARCH_TZFILE
1181    delete gSearchTZFileResult;
1182    gSearchTZFileResult = NULL;
1183#endif
1184
1185#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1186    if (gCorrectedPOSIXLocale) {
1187        uprv_free(gCorrectedPOSIXLocale);
1188        gCorrectedPOSIXLocale = NULL;
1189    }
1190#endif
1191    return TRUE;
1192}
1193
1194/*
1195 * Set the data directory.
1196 *    Make a copy of the passed string, and set the global data dir to point to it.
1197 */
1198U_CAPI void U_EXPORT2
1199u_setDataDirectory(const char *directory) {
1200    char *newDataDir;
1201    int32_t length;
1202
1203    if(directory==NULL || *directory==0) {
1204        /* A small optimization to prevent the malloc and copy when the
1205        shared library is used, and this is a way to make sure that NULL
1206        is never returned.
1207        */
1208        newDataDir = (char *)"";
1209    }
1210    else {
1211        length=(int32_t)uprv_strlen(directory);
1212        newDataDir = (char *)uprv_malloc(length + 2);
1213        /* Exit out if newDataDir could not be created. */
1214        if (newDataDir == NULL) {
1215            return;
1216        }
1217        uprv_strcpy(newDataDir, directory);
1218
1219#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1220        {
1221            char *p;
1222            while(p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) {
1223                *p = U_FILE_SEP_CHAR;
1224            }
1225        }
1226#endif
1227    }
1228
1229    if (gDataDirectory && *gDataDirectory) {
1230        uprv_free(gDataDirectory);
1231    }
1232    gDataDirectory = newDataDir;
1233    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1234}
1235
1236U_CAPI UBool U_EXPORT2
1237uprv_pathIsAbsolute(const char *path)
1238{
1239  if(!path || !*path) {
1240    return FALSE;
1241  }
1242
1243  if(*path == U_FILE_SEP_CHAR) {
1244    return TRUE;
1245  }
1246
1247#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1248  if(*path == U_FILE_ALT_SEP_CHAR) {
1249    return TRUE;
1250  }
1251#endif
1252
1253#if U_PLATFORM_USES_ONLY_WIN32_API
1254  if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1255       ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1256      path[1] == ':' ) {
1257    return TRUE;
1258  }
1259#endif
1260
1261  return FALSE;
1262}
1263
1264/* Temporary backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1265   until some client wrapper makefiles are updated */
1266#if U_PLATFORM_IS_DARWIN_BASED && TARGET_IPHONE_SIMULATOR
1267# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1268#  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1269# endif
1270#endif
1271
1272static void U_CALLCONV dataDirectoryInitFn() {
1273    /* If we already have the directory, then return immediately. Will happen if user called
1274     * u_setDataDirectory().
1275     */
1276    if (gDataDirectory) {
1277        return;
1278    }
1279
1280    const char *path = NULL;
1281#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1282    char datadir_path_buffer[PATH_MAX];
1283#endif
1284
1285    /*
1286    When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1287    override ICU's data with the ICU_DATA environment variable. This prevents
1288    problems where multiple custom copies of ICU's specific version of data
1289    are installed on a system. Either the application must define the data
1290    directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1291    ICU, set the data with udata_setCommonData or trust that all of the
1292    required data is contained in ICU's data library that contains
1293    the entry point defined by U_ICUDATA_ENTRY_POINT.
1294
1295    There may also be some platforms where environment variables
1296    are not allowed.
1297    */
1298#   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1299    /* First try to get the environment variable */
1300    path=getenv("ICU_DATA");
1301#   endif
1302
1303    /* ICU_DATA_DIR may be set as a compile option.
1304     * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1305     * and is used only when data is built in archive mode eliminating the need
1306     * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1307     * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1308     * set their own path.
1309     */
1310#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1311    if(path==NULL || *path==0) {
1312# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1313        const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1314# endif
1315# ifdef ICU_DATA_DIR
1316        path=ICU_DATA_DIR;
1317# else
1318        path=U_ICU_DATA_DEFAULT_DIR;
1319# endif
1320# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1321        if (prefix != NULL) {
1322            snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1323            path=datadir_path_buffer;
1324        }
1325# endif
1326    }
1327#endif
1328
1329    if(path==NULL) {
1330        /* It looks really bad, set it to something. */
1331        path = "";
1332    }
1333
1334    u_setDataDirectory(path);
1335    return;
1336}
1337
1338U_CAPI const char * U_EXPORT2
1339u_getDataDirectory(void) {
1340    umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1341    return gDataDirectory;
1342}
1343
1344static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1345    if (U_FAILURE(status)) {
1346        return;
1347    }
1348    gTimeZoneFilesDirectory->clear();
1349    gTimeZoneFilesDirectory->append(path, status);
1350#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1351    char *p = gTimeZoneFilesDirectory->data();
1352    while (p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) {
1353        *p = U_FILE_SEP_CHAR;
1354    }
1355#endif
1356}
1357
1358#define TO_STRING(x) TO_STRING_2(x)
1359#define TO_STRING_2(x) #x
1360
1361static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1362    U_ASSERT(gTimeZoneFilesDirectory == NULL);
1363    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1364    gTimeZoneFilesDirectory = new CharString();
1365    if (gTimeZoneFilesDirectory == NULL) {
1366        status = U_MEMORY_ALLOCATION_ERROR;
1367        return;
1368    }
1369    const char *dir = getenv("ICU_TIMEZONE_FILES_DIR");
1370#if defined(U_TIMEZONE_FILES_DIR)
1371    if (dir == NULL) {
1372        dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1373    }
1374#endif
1375    if (dir == NULL) {
1376        dir = "";
1377    }
1378    setTimeZoneFilesDir(dir, status);
1379}
1380
1381
1382U_CAPI const char * U_EXPORT2
1383u_getTimeZoneFilesDirectory(UErrorCode *status) {
1384    umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1385    return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1386}
1387
1388U_CAPI void U_EXPORT2
1389u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1390    umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1391    setTimeZoneFilesDir(path, *status);
1392
1393    // Note: this function does some extra churn, first setting based on the
1394    //       environment, then immediately replacing with the value passed in.
1395    //       The logic is simpler that way, and performance shouldn't be an issue.
1396}
1397
1398
1399#if U_POSIX_LOCALE
1400/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1401 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1402 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1403 */
1404static const char *uprv_getPOSIXIDForCategory(int category)
1405{
1406    const char* posixID = NULL;
1407    if (category == LC_MESSAGES || category == LC_CTYPE) {
1408        /*
1409        * On Solaris two different calls to setlocale can result in
1410        * different values. Only get this value once.
1411        *
1412        * We must check this first because an application can set this.
1413        *
1414        * LC_ALL can't be used because it's platform dependent. The LANG
1415        * environment variable seems to affect LC_CTYPE variable by default.
1416        * Here is what setlocale(LC_ALL, NULL) can return.
1417        * HPUX can return 'C C C C C C C'
1418        * Solaris can return /en_US/C/C/C/C/C on the second try.
1419        * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1420        *
1421        * The default codepage detection also needs to use LC_CTYPE.
1422        *
1423        * Do not call setlocale(LC_*, "")! Using an empty string instead
1424        * of NULL, will modify the libc behavior.
1425        */
1426        posixID = setlocale(category, NULL);
1427        if ((posixID == 0)
1428            || (uprv_strcmp("C", posixID) == 0)
1429            || (uprv_strcmp("POSIX", posixID) == 0))
1430        {
1431            /* Maybe we got some garbage.  Try something more reasonable */
1432            posixID = getenv("LC_ALL");
1433            /* Solaris speaks POSIX -  See IEEE Std 1003.1-2008
1434             * This is needed to properly handle empty env. variables
1435             */
1436#if U_PLATFORM == U_PF_SOLARIS
1437            if ((posixID == 0) || (posixID[0] == '\0')) {
1438                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1439                if ((posixID == 0) || (posixID[0] == '\0')) {
1440#else
1441            if (posixID == 0) {
1442                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1443                if (posixID == 0) {
1444#endif
1445                    posixID = getenv("LANG");
1446                }
1447            }
1448        }
1449    }
1450    if ((posixID==0)
1451        || (uprv_strcmp("C", posixID) == 0)
1452        || (uprv_strcmp("POSIX", posixID) == 0))
1453    {
1454        /* Nothing worked.  Give it a nice POSIX default value. */
1455        posixID = "en_US_POSIX";
1456    }
1457    return posixID;
1458}
1459
1460/* Return just the POSIX id for the default locale, whatever happens to be in
1461 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1462 */
1463static const char *uprv_getPOSIXIDForDefaultLocale(void)
1464{
1465    static const char* posixID = NULL;
1466    if (posixID == 0) {
1467        posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1468    }
1469    return posixID;
1470}
1471
1472#if !U_CHARSET_IS_UTF8
1473/* Return just the POSIX id for the default codepage, whatever happens to be in
1474 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1475 */
1476static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1477{
1478    static const char* posixID = NULL;
1479    if (posixID == 0) {
1480        posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1481    }
1482    return posixID;
1483}
1484#endif
1485#endif
1486
1487/* NOTE: The caller should handle thread safety */
1488U_CAPI const char* U_EXPORT2
1489uprv_getDefaultLocaleID()
1490{
1491#if U_POSIX_LOCALE
1492/*
1493  Note that:  (a '!' means the ID is improper somehow)
1494     LC_ALL  ---->     default_loc          codepage
1495--------------------------------------------------------
1496     ab.CD             ab                   CD
1497     ab@CD             ab__CD               -
1498     ab@CD.EF          ab__CD               EF
1499
1500     ab_CD.EF@GH       ab_CD_GH             EF
1501
1502Some 'improper' ways to do the same as above:
1503  !  ab_CD@GH.EF       ab_CD_GH             EF
1504  !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1505  !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1506
1507     _CD@GH            _CD_GH               -
1508     _CD.EF@GH         _CD_GH               EF
1509
1510The variant cannot have dots in it.
1511The 'rightmost' variant (@xxx) wins.
1512The leftmost codepage (.xxx) wins.
1513*/
1514    char *correctedPOSIXLocale = 0;
1515    const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1516    const char *p;
1517    const char *q;
1518    int32_t len;
1519
1520    /* Format: (no spaces)
1521    ll [ _CC ] [ . MM ] [ @ VV]
1522
1523      l = lang, C = ctry, M = charmap, V = variant
1524    */
1525
1526    if (gCorrectedPOSIXLocale != NULL) {
1527        return gCorrectedPOSIXLocale;
1528    }
1529
1530    if ((p = uprv_strchr(posixID, '.')) != NULL) {
1531        /* assume new locale can't be larger than old one? */
1532        correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1533        /* Exit on memory allocation error. */
1534        if (correctedPOSIXLocale == NULL) {
1535            return NULL;
1536        }
1537        uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1538        correctedPOSIXLocale[p-posixID] = 0;
1539
1540        /* do not copy after the @ */
1541        if ((p = uprv_strchr(correctedPOSIXLocale, '@')) != NULL) {
1542            correctedPOSIXLocale[p-correctedPOSIXLocale] = 0;
1543        }
1544    }
1545
1546    /* Note that we scan the *uncorrected* ID. */
1547    if ((p = uprv_strrchr(posixID, '@')) != NULL) {
1548        if (correctedPOSIXLocale == NULL) {
1549            correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID)+1));
1550            /* Exit on memory allocation error. */
1551            if (correctedPOSIXLocale == NULL) {
1552                return NULL;
1553            }
1554            uprv_strncpy(correctedPOSIXLocale, posixID, p-posixID);
1555            correctedPOSIXLocale[p-posixID] = 0;
1556        }
1557        p++;
1558
1559        /* Take care of any special cases here.. */
1560        if (!uprv_strcmp(p, "nynorsk")) {
1561            p = "NY";
1562            /* Don't worry about no__NY. In practice, it won't appear. */
1563        }
1564
1565        if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) {
1566            uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */
1567        }
1568        else {
1569            uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1570        }
1571
1572        if ((q = uprv_strchr(p, '.')) != NULL) {
1573            /* How big will the resulting string be? */
1574            len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1575            uprv_strncat(correctedPOSIXLocale, p, q-p);
1576            correctedPOSIXLocale[len] = 0;
1577        }
1578        else {
1579            /* Anything following the @ sign */
1580            uprv_strcat(correctedPOSIXLocale, p);
1581        }
1582
1583        /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1584         * How about 'russian' -> 'ru'?
1585         * Many of the other locales using ISO codes will be handled by the
1586         * canonicalization functions in uloc_getDefault.
1587         */
1588    }
1589
1590    /* Was a correction made? */
1591    if (correctedPOSIXLocale != NULL) {
1592        posixID = correctedPOSIXLocale;
1593    }
1594    else {
1595        /* copy it, just in case the original pointer goes away.  See j2395 */
1596        correctedPOSIXLocale = (char *)uprv_malloc(uprv_strlen(posixID) + 1);
1597        /* Exit on memory allocation error. */
1598        if (correctedPOSIXLocale == NULL) {
1599            return NULL;
1600        }
1601        posixID = uprv_strcpy(correctedPOSIXLocale, posixID);
1602    }
1603
1604    if (gCorrectedPOSIXLocale == NULL) {
1605        gCorrectedPOSIXLocale = correctedPOSIXLocale;
1606        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1607        correctedPOSIXLocale = NULL;
1608    }
1609
1610    if (correctedPOSIXLocale != NULL) {  /* Was already set - clean up. */
1611        uprv_free(correctedPOSIXLocale);
1612    }
1613
1614    return posixID;
1615
1616#elif U_PLATFORM_USES_ONLY_WIN32_API
1617#define POSIX_LOCALE_CAPACITY 64
1618    UErrorCode status = U_ZERO_ERROR;
1619    char *correctedPOSIXLocale = 0;
1620
1621    if (gCorrectedPOSIXLocale != NULL) {
1622        return gCorrectedPOSIXLocale;
1623    }
1624
1625    LCID id = GetThreadLocale();
1626    correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1627    if (correctedPOSIXLocale) {
1628        int32_t posixLen = uprv_convertToPosix(id, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1629        if (U_SUCCESS(status)) {
1630            *(correctedPOSIXLocale + posixLen) = 0;
1631            gCorrectedPOSIXLocale = correctedPOSIXLocale;
1632            ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1633        } else {
1634            uprv_free(correctedPOSIXLocale);
1635        }
1636    }
1637
1638    if (gCorrectedPOSIXLocale == NULL) {
1639        return "en_US";
1640    }
1641    return gCorrectedPOSIXLocale;
1642
1643#elif U_PLATFORM == U_PF_OS400
1644    /* locales are process scoped and are by definition thread safe */
1645    static char correctedLocale[64];
1646    const  char *localeID = getenv("LC_ALL");
1647           char *p;
1648
1649    if (localeID == NULL)
1650        localeID = getenv("LANG");
1651    if (localeID == NULL)
1652        localeID = setlocale(LC_ALL, NULL);
1653    /* Make sure we have something... */
1654    if (localeID == NULL)
1655        return "en_US_POSIX";
1656
1657    /* Extract the locale name from the path. */
1658    if((p = uprv_strrchr(localeID, '/')) != NULL)
1659    {
1660        /* Increment p to start of locale name. */
1661        p++;
1662        localeID = p;
1663    }
1664
1665    /* Copy to work location. */
1666    uprv_strcpy(correctedLocale, localeID);
1667
1668    /* Strip off the '.locale' extension. */
1669    if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1670        *p = 0;
1671    }
1672
1673    /* Upper case the locale name. */
1674    T_CString_toUpperCase(correctedLocale);
1675
1676    /* See if we are using the POSIX locale.  Any of the
1677    * following are equivalent and use the same QLGPGCMA
1678    * (POSIX) locale.
1679    * QLGPGCMA2 means UCS2
1680    * QLGPGCMA_4 means UTF-32
1681    * QLGPGCMA_8 means UTF-8
1682    */
1683    if ((uprv_strcmp("C", correctedLocale) == 0) ||
1684        (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1685        (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1686    {
1687        uprv_strcpy(correctedLocale, "en_US_POSIX");
1688    }
1689    else
1690    {
1691        int16_t LocaleLen;
1692
1693        /* Lower case the lang portion. */
1694        for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1695        {
1696            *p = uprv_tolower(*p);
1697        }
1698
1699        /* Adjust for Euro.  After '_E' add 'URO'. */
1700        LocaleLen = uprv_strlen(correctedLocale);
1701        if (correctedLocale[LocaleLen - 2] == '_' &&
1702            correctedLocale[LocaleLen - 1] == 'E')
1703        {
1704            uprv_strcat(correctedLocale, "URO");
1705        }
1706
1707        /* If using Lotus-based locale then convert to
1708         * equivalent non Lotus.
1709         */
1710        else if (correctedLocale[LocaleLen - 2] == '_' &&
1711            correctedLocale[LocaleLen - 1] == 'L')
1712        {
1713            correctedLocale[LocaleLen - 2] = 0;
1714        }
1715
1716        /* There are separate simplified and traditional
1717         * locales called zh_HK_S and zh_HK_T.
1718         */
1719        else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1720        {
1721            uprv_strcpy(correctedLocale, "zh_HK");
1722        }
1723
1724        /* A special zh_CN_GBK locale...
1725        */
1726        else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1727        {
1728            uprv_strcpy(correctedLocale, "zh_CN");
1729        }
1730
1731    }
1732
1733    return correctedLocale;
1734#endif
1735
1736}
1737
1738#if !U_CHARSET_IS_UTF8
1739#if U_POSIX_LOCALE
1740/*
1741Due to various platform differences, one platform may specify a charset,
1742when they really mean a different charset. Remap the names so that they are
1743compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1744here. Before adding anything to this function, please consider adding unique
1745names to the ICU alias table in the data directory.
1746*/
1747static const char*
1748remapPlatformDependentCodepage(const char *locale, const char *name) {
1749    if (locale != NULL && *locale == 0) {
1750        /* Make sure that an empty locale is handled the same way. */
1751        locale = NULL;
1752    }
1753    if (name == NULL) {
1754        return NULL;
1755    }
1756#if U_PLATFORM == U_PF_AIX
1757    if (uprv_strcmp(name, "IBM-943") == 0) {
1758        /* Use the ASCII compatible ibm-943 */
1759        name = "Shift-JIS";
1760    }
1761    else if (uprv_strcmp(name, "IBM-1252") == 0) {
1762        /* Use the windows-1252 that contains the Euro */
1763        name = "IBM-5348";
1764    }
1765#elif U_PLATFORM == U_PF_SOLARIS
1766    if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1767        /* Solaris underspecifies the "EUC" name. */
1768        if (uprv_strcmp(locale, "zh_CN") == 0) {
1769            name = "EUC-CN";
1770        }
1771        else if (uprv_strcmp(locale, "zh_TW") == 0) {
1772            name = "EUC-TW";
1773        }
1774        else if (uprv_strcmp(locale, "ko_KR") == 0) {
1775            name = "EUC-KR";
1776        }
1777    }
1778    else if (uprv_strcmp(name, "eucJP") == 0) {
1779        /*
1780        ibm-954 is the best match.
1781        ibm-33722 is the default for eucJP (similar to Windows).
1782        */
1783        name = "eucjis";
1784    }
1785    else if (uprv_strcmp(name, "646") == 0) {
1786        /*
1787         * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1788         * ISO-8859-1 instead of US-ASCII(646).
1789         */
1790        name = "ISO-8859-1";
1791    }
1792#elif U_PLATFORM_IS_DARWIN_BASED
1793    if (locale == NULL && *name == 0) {
1794        /*
1795        No locale was specified, and an empty name was passed in.
1796        This usually indicates that nl_langinfo didn't return valid information.
1797        Mac OS X uses UTF-8 by default (especially the locale data and console).
1798        */
1799        name = "UTF-8";
1800    }
1801    else if (uprv_strcmp(name, "CP949") == 0) {
1802        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1803        name = "EUC-KR";
1804    }
1805    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1806        /*
1807         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1808         */
1809        name = "UTF-8";
1810    }
1811#elif U_PLATFORM == U_PF_BSD
1812    if (uprv_strcmp(name, "CP949") == 0) {
1813        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1814        name = "EUC-KR";
1815    }
1816#elif U_PLATFORM == U_PF_HPUX
1817    if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1818        /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1819        /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1820        name = "hkbig5";
1821    }
1822    else if (uprv_strcmp(name, "eucJP") == 0) {
1823        /*
1824        ibm-1350 is the best match, but unavailable.
1825        ibm-954 is mostly a superset of ibm-1350.
1826        ibm-33722 is the default for eucJP (similar to Windows).
1827        */
1828        name = "eucjis";
1829    }
1830#elif U_PLATFORM == U_PF_LINUX
1831    if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1832        /* Linux underspecifies the "EUC" name. */
1833        if (uprv_strcmp(locale, "korean") == 0) {
1834            name = "EUC-KR";
1835        }
1836        else if (uprv_strcmp(locale, "japanese") == 0) {
1837            /* See comment below about eucJP */
1838            name = "eucjis";
1839        }
1840    }
1841    else if (uprv_strcmp(name, "eucjp") == 0) {
1842        /*
1843        ibm-1350 is the best match, but unavailable.
1844        ibm-954 is mostly a superset of ibm-1350.
1845        ibm-33722 is the default for eucJP (similar to Windows).
1846        */
1847        name = "eucjis";
1848    }
1849    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1850            (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1851        /*
1852         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1853         */
1854        name = "UTF-8";
1855    }
1856    /*
1857     * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
1858     * it by falling back to 'US-ASCII' when NULL is returned from this
1859     * function. So, we don't have to worry about it here.
1860     */
1861#endif
1862    /* return NULL when "" is passed in */
1863    if (*name == 0) {
1864        name = NULL;
1865    }
1866    return name;
1867}
1868
1869static const char*
1870getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
1871{
1872    char localeBuf[100];
1873    const char *name = NULL;
1874    char *variant = NULL;
1875
1876    if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
1877        size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
1878        uprv_strncpy(localeBuf, localeName, localeCapacity);
1879        localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
1880        name = uprv_strncpy(buffer, name+1, buffCapacity);
1881        buffer[buffCapacity-1] = 0; /* ensure NULL termination */
1882        if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
1883            *variant = 0;
1884        }
1885        name = remapPlatformDependentCodepage(localeBuf, name);
1886    }
1887    return name;
1888}
1889#endif
1890
1891static const char*
1892int_getDefaultCodepage()
1893{
1894#if U_PLATFORM == U_PF_OS400
1895    uint32_t ccsid = 37; /* Default to ibm-37 */
1896    static char codepage[64];
1897    Qwc_JOBI0400_t jobinfo;
1898    Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
1899
1900    EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
1901        "*                         ", "                ", &error);
1902
1903    if (error.Bytes_Available == 0) {
1904        if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
1905            ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
1906        }
1907        else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
1908            ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
1909        }
1910        /* else use the default */
1911    }
1912    sprintf(codepage,"ibm-%d", ccsid);
1913    return codepage;
1914
1915#elif U_PLATFORM == U_PF_OS390
1916    static char codepage[64];
1917
1918    strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
1919    strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
1920    codepage[63] = 0; /* NULL terminate */
1921
1922    return codepage;
1923
1924#elif U_PLATFORM_USES_ONLY_WIN32_API
1925    static char codepage[64];
1926    sprintf(codepage, "windows-%d", GetACP());
1927    return codepage;
1928
1929#elif U_POSIX_LOCALE
1930    static char codesetName[100];
1931    const char *localeName = NULL;
1932    const char *name = NULL;
1933
1934    localeName = uprv_getPOSIXIDForDefaultCodepage();
1935    uprv_memset(codesetName, 0, sizeof(codesetName));
1936    /* On Solaris nl_langinfo returns C locale values unless setlocale
1937     * was called earlier.
1938     */
1939#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
1940    /* When available, check nl_langinfo first because it usually gives more
1941       useful names. It depends on LC_CTYPE.
1942       nl_langinfo may use the same buffer as setlocale. */
1943    {
1944        const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
1945#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
1946        /*
1947         * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
1948         * instead of ASCII.
1949         */
1950        if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
1951            codeset = remapPlatformDependentCodepage(localeName, codeset);
1952        } else
1953#endif
1954        {
1955            codeset = remapPlatformDependentCodepage(NULL, codeset);
1956        }
1957
1958        if (codeset != NULL) {
1959            uprv_strncpy(codesetName, codeset, sizeof(codesetName));
1960            codesetName[sizeof(codesetName)-1] = 0;
1961            return codesetName;
1962        }
1963    }
1964#endif
1965
1966    /* Use setlocale in a nice way, and then check some environment variables.
1967       Maybe the application used setlocale already.
1968    */
1969    uprv_memset(codesetName, 0, sizeof(codesetName));
1970    name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
1971    if (name) {
1972        /* if we can find the codeset name from setlocale, return that. */
1973        return name;
1974    }
1975
1976    if (*codesetName == 0)
1977    {
1978        /* Everything failed. Return US ASCII (ISO 646). */
1979        (void)uprv_strcpy(codesetName, "US-ASCII");
1980    }
1981    return codesetName;
1982#else
1983    return "US-ASCII";
1984#endif
1985}
1986
1987
1988U_CAPI const char*  U_EXPORT2
1989uprv_getDefaultCodepage()
1990{
1991    static char const  *name = NULL;
1992    umtx_lock(NULL);
1993    if (name == NULL) {
1994        name = int_getDefaultCodepage();
1995    }
1996    umtx_unlock(NULL);
1997    return name;
1998}
1999#endif  /* !U_CHARSET_IS_UTF8 */
2000
2001
2002/* end of platform-specific implementation -------------- */
2003
2004/* version handling --------------------------------------------------------- */
2005
2006U_CAPI void U_EXPORT2
2007u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2008    char *end;
2009    uint16_t part=0;
2010
2011    if(versionArray==NULL) {
2012        return;
2013    }
2014
2015    if(versionString!=NULL) {
2016        for(;;) {
2017            versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2018            if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2019                break;
2020            }
2021            versionString=end+1;
2022        }
2023    }
2024
2025    while(part<U_MAX_VERSION_LENGTH) {
2026        versionArray[part++]=0;
2027    }
2028}
2029
2030U_CAPI void U_EXPORT2
2031u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2032    if(versionArray!=NULL && versionString!=NULL) {
2033        char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2034        int32_t len = u_strlen(versionString);
2035        if(len>U_MAX_VERSION_STRING_LENGTH) {
2036            len = U_MAX_VERSION_STRING_LENGTH;
2037        }
2038        u_UCharsToChars(versionString, versionChars, len);
2039        versionChars[len]=0;
2040        u_versionFromString(versionArray, versionChars);
2041    }
2042}
2043
2044U_CAPI void U_EXPORT2
2045u_versionToString(const UVersionInfo versionArray, char *versionString) {
2046    uint16_t count, part;
2047    uint8_t field;
2048
2049    if(versionString==NULL) {
2050        return;
2051    }
2052
2053    if(versionArray==NULL) {
2054        versionString[0]=0;
2055        return;
2056    }
2057
2058    /* count how many fields need to be written */
2059    for(count=4; count>0 && versionArray[count-1]==0; --count) {
2060    }
2061
2062    if(count <= 1) {
2063        count = 2;
2064    }
2065
2066    /* write the first part */
2067    /* write the decimal field value */
2068    field=versionArray[0];
2069    if(field>=100) {
2070        *versionString++=(char)('0'+field/100);
2071        field%=100;
2072    }
2073    if(field>=10) {
2074        *versionString++=(char)('0'+field/10);
2075        field%=10;
2076    }
2077    *versionString++=(char)('0'+field);
2078
2079    /* write the following parts */
2080    for(part=1; part<count; ++part) {
2081        /* write a dot first */
2082        *versionString++=U_VERSION_DELIMITER;
2083
2084        /* write the decimal field value */
2085        field=versionArray[part];
2086        if(field>=100) {
2087            *versionString++=(char)('0'+field/100);
2088            field%=100;
2089        }
2090        if(field>=10) {
2091            *versionString++=(char)('0'+field/10);
2092            field%=10;
2093        }
2094        *versionString++=(char)('0'+field);
2095    }
2096
2097    /* NUL-terminate */
2098    *versionString=0;
2099}
2100
2101U_CAPI void U_EXPORT2
2102u_getVersion(UVersionInfo versionArray) {
2103    (void)copyright;   // Suppress unused variable warning from clang.
2104    u_versionFromString(versionArray, U_ICU_VERSION);
2105}
2106
2107/**
2108 * icucfg.h dependent code
2109 */
2110
2111#if U_ENABLE_DYLOAD
2112
2113#if HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2114
2115#if HAVE_DLFCN_H
2116
2117#ifdef __MVS__
2118#ifndef __SUSV3
2119#define __SUSV3 1
2120#endif
2121#endif
2122#include <dlfcn.h>
2123#endif
2124
2125U_INTERNAL void * U_EXPORT2
2126uprv_dl_open(const char *libName, UErrorCode *status) {
2127  void *ret = NULL;
2128  if(U_FAILURE(*status)) return ret;
2129  ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2130  if(ret==NULL) {
2131#ifdef U_TRACE_DYLOAD
2132    printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2133#endif
2134    *status = U_MISSING_RESOURCE_ERROR;
2135  }
2136  return ret;
2137}
2138
2139U_INTERNAL void U_EXPORT2
2140uprv_dl_close(void *lib, UErrorCode *status) {
2141  if(U_FAILURE(*status)) return;
2142  dlclose(lib);
2143}
2144
2145U_INTERNAL UVoidFunction* U_EXPORT2
2146uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2147  union {
2148      UVoidFunction *fp;
2149      void *vp;
2150  } uret;
2151  uret.fp = NULL;
2152  if(U_FAILURE(*status)) return uret.fp;
2153  uret.vp = dlsym(lib, sym);
2154  if(uret.vp == NULL) {
2155#ifdef U_TRACE_DYLOAD
2156    printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2157#endif
2158    *status = U_MISSING_RESOURCE_ERROR;
2159  }
2160  return uret.fp;
2161}
2162
2163#else
2164
2165/* null (nonexistent) implementation. */
2166
2167U_INTERNAL void * U_EXPORT2
2168uprv_dl_open(const char *libName, UErrorCode *status) {
2169  if(U_FAILURE(*status)) return NULL;
2170  *status = U_UNSUPPORTED_ERROR;
2171  return NULL;
2172}
2173
2174U_INTERNAL void U_EXPORT2
2175uprv_dl_close(void *lib, UErrorCode *status) {
2176  if(U_FAILURE(*status)) return;
2177  *status = U_UNSUPPORTED_ERROR;
2178  return;
2179}
2180
2181
2182U_INTERNAL UVoidFunction* U_EXPORT2
2183uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2184  if(U_SUCCESS(*status)) {
2185    *status = U_UNSUPPORTED_ERROR;
2186  }
2187  return (UVoidFunction*)NULL;
2188}
2189
2190
2191
2192#endif
2193
2194#elif U_PLATFORM_USES_ONLY_WIN32_API
2195
2196U_INTERNAL void * U_EXPORT2
2197uprv_dl_open(const char *libName, UErrorCode *status) {
2198  HMODULE lib = NULL;
2199
2200  if(U_FAILURE(*status)) return NULL;
2201
2202  lib = LoadLibraryA(libName);
2203
2204  if(lib==NULL) {
2205    *status = U_MISSING_RESOURCE_ERROR;
2206  }
2207
2208  return (void*)lib;
2209}
2210
2211U_INTERNAL void U_EXPORT2
2212uprv_dl_close(void *lib, UErrorCode *status) {
2213  HMODULE handle = (HMODULE)lib;
2214  if(U_FAILURE(*status)) return;
2215
2216  FreeLibrary(handle);
2217
2218  return;
2219}
2220
2221
2222U_INTERNAL UVoidFunction* U_EXPORT2
2223uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2224  HMODULE handle = (HMODULE)lib;
2225  UVoidFunction* addr = NULL;
2226
2227  if(U_FAILURE(*status) || lib==NULL) return NULL;
2228
2229  addr = (UVoidFunction*)GetProcAddress(handle, sym);
2230
2231  if(addr==NULL) {
2232    DWORD lastError = GetLastError();
2233    if(lastError == ERROR_PROC_NOT_FOUND) {
2234      *status = U_MISSING_RESOURCE_ERROR;
2235    } else {
2236      *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2237    }
2238  }
2239
2240  return addr;
2241}
2242
2243
2244#else
2245
2246/* No dynamic loading set. */
2247
2248U_INTERNAL void * U_EXPORT2
2249uprv_dl_open(const char *libName, UErrorCode *status) {
2250    (void)libName;
2251    if(U_FAILURE(*status)) return NULL;
2252    *status = U_UNSUPPORTED_ERROR;
2253    return NULL;
2254}
2255
2256U_INTERNAL void U_EXPORT2
2257uprv_dl_close(void *lib, UErrorCode *status) {
2258    (void)lib;
2259    if(U_FAILURE(*status)) return;
2260    *status = U_UNSUPPORTED_ERROR;
2261    return;
2262}
2263
2264
2265U_INTERNAL UVoidFunction* U_EXPORT2
2266uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2267  (void)lib;
2268  (void)sym;
2269  if(U_SUCCESS(*status)) {
2270    *status = U_UNSUPPORTED_ERROR;
2271  }
2272  return (UVoidFunction*)NULL;
2273}
2274
2275#endif /* U_ENABLE_DYLOAD */
2276
2277/*
2278 * Hey, Emacs, please set the following:
2279 *
2280 * Local Variables:
2281 * indent-tabs-mode: nil
2282 * End:
2283 *
2284 */
2285